+++ CCTV_감시구역.py
... | ... | @@ -0,0 +1,42 @@ |
1 | +import pandas as pd | |
2 | +import geopandas as gpd | |
3 | +from pyogrio import read_dataframe, write_dataframe | |
4 | +from shapely.geometry import Point | |
5 | + | |
6 | + | |
7 | +grid_df = read_dataframe('DATA/refined/geopackage/시군구별/중분류_37100_경산시.gpkg', encoding='utf-8') | |
8 | + | |
9 | +light_df = pd.read_csv("DATA/raw/보안등/경상북도_경산시_보안등정보_20230711.csv", encoding="ms949") | |
10 | + | |
11 | +grid_df = grid_df.to_crs("epsg:4326") | |
12 | + | |
13 | +def is_float(x): | |
14 | + try: | |
15 | + float(x) | |
16 | + return True | |
17 | + except ValueError: | |
18 | + return False | |
19 | + | |
20 | +valid_lightings_df = light_df[light_df['경도'].apply(is_float) & light_df['위도'].apply(is_float)] | |
21 | + | |
22 | +valid_lightings_df['geometry'] = valid_lightings_df.apply(lambda row: Point(float(row['경도']), float(row['위도'])), axis=1) | |
23 | +valid_lightings_df = gpd.GeoDataFrame(valid_lightings_df, geometry='geometry') | |
24 | + | |
25 | + | |
26 | +buffer_radius = 50 | |
27 | + | |
28 | +points_buffered = valid_lightings_df.buffer(buffer_radius) | |
29 | +valid_lightings_df = points_buffered | |
30 | + | |
31 | +def calculate_intersection_ratio(polygon, points_buffered): | |
32 | + intersections = points_buffered.intersection(polygon) | |
33 | + intersection_area = intersections.area.sum() | |
34 | + return intersection_area / polygon.area | |
35 | + | |
36 | +# Apply the function to each polygon | |
37 | +grid_df['intersection_ratio'] = grid_df.apply( | |
38 | + lambda row: calculate_intersection_ratio(row['geometry'], points_buffered), | |
39 | + axis=1 | |
40 | +) | |
41 | + | |
42 | +write_dataframe(grid_df, "감시비율(100m).gpkg")(파일 끝에 줄바꿈 문자 없음) |
+++ CCTV_감시구역_격자.gpkg
Binary file is not shown |
+++ CCTV설치점수산출.py
... | ... | @@ -0,0 +1,141 @@ |
1 | +from pyogrio import read_dataframe, write_dataframe | |
2 | +import glob | |
3 | +import pandas as pd | |
4 | +import numpy as np | |
5 | +import geopandas as gpd | |
6 | +import json | |
7 | +from joblib import Parallel, delayed | |
8 | + | |
9 | +from tools.spatial_indexed_intersection import geom_overlay | |
10 | + | |
11 | + | |
12 | +GRID_INDEX = "GID" | |
13 | +LOCALE_INDEX = "EMD_CD" | |
14 | + | |
15 | +SIG_CODE = [ | |
16 | + ["경산", "47290"], | |
17 | + ["경주", "47130"], | |
18 | + ["구미", "47190"], | |
19 | + ["김천", "47150"], | |
20 | + ["안동", "47170"], | |
21 | + ["영주", "47210"], | |
22 | + ["영천", "47230"], | |
23 | + ["예천", "47900"], | |
24 | + ["칠곡", "47850"], | |
25 | + ["포항_남구", "47111"], | |
26 | + ["포항_북구", "47113"] | |
27 | +] | |
28 | + | |
29 | + | |
30 | +gpkg_datas = glob.glob("DATA/processed/점수산출_등급화전_전체데이터/v4/*.gpkg") | |
31 | + | |
32 | +house_ratio_grid_path = "DATA/processed/건물연면적/buildings_summary_per_grid.gpkg" | |
33 | +house_ratio_grid_columns_to_extract = ["BLDG_CNT", "FLOOR_AREA_SUM","HOUSE_FLOOR_AREA_SUM", "HOUSE_FLOOR_AREA_RATIO"] | |
34 | + | |
35 | + | |
36 | +def range_creator(df, column, num_ranges): | |
37 | + """ | |
38 | + Creates a list of tuples representing ranges based on percentiles. | |
39 | + | |
40 | + Parameters: | |
41 | + df (pd.DataFrame): The input dataframe. | |
42 | + column (str): The column name to calculate the ranges for. | |
43 | + num_ranges (int): The number of ranges to create. | |
44 | + | |
45 | + Returns: | |
46 | + list of tuples: Each tuple contains the start and end of a range. | |
47 | + """ | |
48 | + percentiles = np.linspace(0, 100, num_ranges + 1) | |
49 | + values = np.percentile(df[column], percentiles) | |
50 | + ranges = [(values[i], values[i + 1]) for i in range(len(values) - 1)] | |
51 | + return ranges | |
52 | + | |
53 | +def map_value_to_range(df, column, ranges): | |
54 | + """ | |
55 | + Maps values in a column to the specified ranges. | |
56 | + | |
57 | + Parameters: | |
58 | + df (pd.DataFrame): The input dataframe. | |
59 | + column (str): The column name to map values for. | |
60 | + ranges (list of tuples): The list of ranges to map values to. | |
61 | + | |
62 | + Returns: | |
63 | + pd.Series: A series of the same length as the input dataframe, | |
64 | + where each value is the index of the range it falls into. | |
65 | + """ | |
66 | + def map_value_to_range(value): | |
67 | + for i, (start, end) in enumerate(ranges): | |
68 | + if start <= value <= end: | |
69 | + return i + 1 | |
70 | + return np.nan | |
71 | + | |
72 | + return df[column].apply(map_value_to_range) | |
73 | + | |
74 | +def export_geojson_into_superset_compat_data(df, name): | |
75 | + print("폴리곤 데이터가 한 줄에 한 도형인지 다시 확인하세요! 별도의 예외처리가 없습니다!") | |
76 | + df = df.to_crs("epsg:4326") | |
77 | + df = df.fillna(0) | |
78 | + # converting shapely format into geojson for SUPERSET visualization use | |
79 | + df["geometry"] = df["geometry"].apply(lambda row : row.__geo_interface__["coordinates"][0][0]) | |
80 | + #... mapbox don't like single quote... | |
81 | + df["geometry"] = df["geometry"].apply(lambda row : json.dumps(row, indent=4)) | |
82 | + | |
83 | + df.to_csv(f"{name}.csv") | |
84 | + # write_dataframe(df, f"DATA/processed/점수산출_등급화전_전체데이터/{i.split('/')[-1].split('.')[0]}.csv") | |
85 | + | |
86 | +def process_region(gpkg): | |
87 | + df = read_dataframe(gpkg) | |
88 | + | |
89 | + df = df.fillna(0) | |
90 | + | |
91 | + filename = gpkg.split('/')[-1].split('.')[0] | |
92 | + | |
93 | + # exclude by rules | |
94 | + df = df[(df["총인구"] > 0) | (df["BLDG_CNT"] > 0)] | |
95 | + df = df[df["산지영역"] < 9000] | |
96 | + df = df[df["아파트단지영역"] < 1000] | |
97 | + | |
98 | + # calculate | |
99 | + df["감시취약지수"] = (10000 - df["CCTV_감시영역"]) * 0.00352 | |
100 | + df["범죄특성요인"] = df["범죄취약점수"] * 17.6 | |
101 | + df["범죄예방요인"] = df["area_ratio"] * 14.7 | |
102 | + df["환경요인"] = df["IH_RATIO"] * 2.9 | |
103 | + 인구밀집요인등급화 = range_creator(df, "총인구", 10) | |
104 | + df["인구밀집요인"] = map_value_to_range(df, "총인구", 인구밀집요인등급화) * 1.76 | |
105 | + | |
106 | + 취약인구비율_평균 = df["취약인구_비율"].mean() | |
107 | + 단독주택연면적_평균 = df["HOUSE_FLOOR_AREA_RATIO"].mean() | |
108 | + df["가중치1"] = df.apply(lambda row: 1 if row["취약인구_비율"] > 취약인구비율_평균 else 0, axis=1) | |
109 | + df["가중치2"] = df.apply(lambda row: 1 if row["HOUSE_FLOOR_AREA_RATIO"] > 단독주택연면적_평균 else 0, axis=1) | |
110 | + | |
111 | + | |
112 | + def calculate_score(row): | |
113 | + sum = row["감시취약지수"] + row["범죄특성요인"] + row["범죄예방요인"] + row["환경요인"] + row["인구밀집요인"] | |
114 | + 가중치 = 1 | |
115 | + if row["가중치1"] == 1 : | |
116 | + 가중치 += 0.095 | |
117 | + if row["가중치2"] == 1 : | |
118 | + 가중치 += 0.041 | |
119 | + return sum * 가중치 | |
120 | + | |
121 | + df["최종지수"] = df.apply(calculate_score, axis=1) | |
122 | + # print(df["최종지수"]) | |
123 | + 최종지수등급화 = range_creator(df, "최종지수", 100) | |
124 | + df["최종지수등급"] = map_value_to_range(df, "최종지수", 최종지수등급화) | |
125 | + | |
126 | + save_loc = f"DATA/processed/최종데이터/{gpkg.split('/')[-1].split('.')[0]}_격자" | |
127 | + df = df.drop_duplicates(keep="first") | |
128 | + write_dataframe(df, f"{save_loc}.gpkg") | |
129 | + with open(f"{save_loc}_메타데이터.txt", 'w', encoding='utf-8') as file: | |
130 | + file.write(f"취약인구비율_평균 : {취약인구비율_평균}\n") | |
131 | + file.write(f"단독주택연면적_평균 : {단독주택연면적_평균}\n") | |
132 | + file.write(f"인구밀집요인등급화 : {인구밀집요인등급화}\n") | |
133 | + file.write(f"최종지수등급화 : {최종지수등급화}\n") | |
134 | + pd.set_option("display.max_rows", None) | |
135 | + file.write(f"등급별_격자수 : {df['최종지수등급'].value_counts().sort_index()}\n") | |
136 | + df = df.to_crs("epsg:4326") | |
137 | + df["centroid"] = df["geometry"].centroid | |
138 | + df.to_csv(f"{save_loc}_표준좌표계.csv") | |
139 | + | |
140 | + | |
141 | +Parallel(n_jobs=11)(delayed(process_region)(gpkg) for gpkg in gpkg_datas)(파일 끝에 줄바꿈 문자 없음) |
+++ cctv_ratio.py
... | ... | @@ -0,0 +1,19 @@ |
1 | +import glob | |
2 | + | |
3 | +from pyogrio import read_dataframe, write_dataframe | |
4 | + | |
5 | +from tools.spatial_indexed_intersection import geom_overlay | |
6 | + | |
7 | + | |
8 | +cctv_list_by_region = glob.glob("DATA/refined/geopackage/CCTV_감시영역/*.gpkg") | |
9 | + | |
10 | +grid = read_dataframe("DATA/refined/geopackage/100x100/100m격자총인구.gpkg") | |
11 | + | |
12 | +for region in cctv_list_by_region: | |
13 | + region_gpkg = read_dataframe(region) | |
14 | + region_gpkg = region_gpkg.to_crs(grid.crs) | |
15 | + result = geom_overlay(region_gpkg, grid) | |
16 | + result = result.dissolve(by="GID").reset_index() | |
17 | + result["CCTV_감시영역"] = result['geometry'].area | |
18 | + write_dataframe(result, f"DATA/refined/geopackage/CCTV_감시영역_격자/{region.split('/')[-1].split(',')[0]}_격자.gpkg") | |
19 | + |
+++ crimin_area_cal.py
... | ... | @@ -0,0 +1,16 @@ |
1 | +from pyogrio import read_dataframe, write_dataframe | |
2 | +import geopandas as gpd | |
3 | +import pandas as pd | |
4 | +from joblib import Parallel, delayed | |
5 | + | |
6 | +def process_file(i, grid, grid_spatial_index): | |
7 | + crimin_data = read_dataframe(f"DATA/refined/geopackage/범죄주의구역_가공/범죄주의구역-경북{i+1}.gpkg") | |
8 | + crimin_data = crimin_data.to_crs("EPSG:5179") | |
9 | + crimin_data_sindex = crimin_data.sindex | |
10 | + crimin_overlap = grid.overlay(crimin_data, how="intersection") | |
11 | + write_dataframe(crimin_overlap,f"DATA/refined/geopackage/범죄주의구역_가공/범죄주의구역-경북-격자{i+1}.gpkg") | |
12 | + | |
13 | +grid = read_dataframe("DATA/refined/geopackage/시군구읍면동_경상북도_100x100.gpkg") | |
14 | +grid_spatial_index = grid.sindex | |
15 | +print("grid_loaded!") | |
16 | +Parallel(n_jobs=-1)(delayed(process_file)(i, grid) for i in range(10)) |
+++ survillence_area.py
... | ... | @@ -0,0 +1,50 @@ |
1 | +from pyogrio import write_dataframe, read_dataframe | |
2 | +import pandas as pd | |
3 | +import geopandas as gpd | |
4 | +import numpy as np | |
5 | +import glob | |
6 | + | |
7 | + | |
8 | +SIG_CODE = [ | |
9 | + ["경산", "47290"], | |
10 | + ["경주", "47130"], | |
11 | + ["구미", "47190"], | |
12 | + ["김천", "47150"], | |
13 | + ["안동", "47170"], | |
14 | + ["영주", "47210"], | |
15 | + ["영천", "47230"], | |
16 | + ["예천", "47900"], | |
17 | + ["칠곡", "47850"], | |
18 | + ["포항_남구", "47111"], | |
19 | + ["포항_북구", "47113"] | |
20 | +] | |
21 | + | |
22 | +# origin = read_dataframe("DATA/refined/geopackage/100x100/100m격자총인구.gpkg") | |
23 | +# print(origin.columns) | |
24 | +boarder = read_dataframe("DATA/refined/geopackage/음영구역_격자/100m격자총인구.gpkg격자_경계.gpkg") | |
25 | +print(boarder.columns) | |
26 | +negative = read_dataframe("DATA/refined/geopackage/음영구역_격자/100m격자_음영구역_CPTED.gpkg") | |
27 | +print(negative.columns) | |
28 | + | |
29 | +print("dataframe_loaded!") | |
30 | + | |
31 | +for SIG in SIG_CODE: | |
32 | + boarder_SIG = boarder[boarder["EMD_CD"].str.contains(SIG[1])] | |
33 | + negative_SIG = negative[negative["EMD_CD"].str.contains(SIG[1])] | |
34 | + # origin_SIG = origin[origin["EMD_CD"].str.contains(SIG[1])] | |
35 | + boarder_SIG = boarder_SIG.dissolve(by="GID") | |
36 | + negative_SIG = negative_SIG.dissolve(by="GID") | |
37 | + boarder_SIG["boarder_area"] = boarder_SIG["geometry"].area | |
38 | + negative_SIG["negative_area"] = negative_SIG["geometry"].area | |
39 | + | |
40 | + boarder_SIG = boarder_SIG.rename(columns={"geometry" : "boarder_geometry"}) | |
41 | + negative_SIG = negative_SIG.rename(columns={"geometry": "negative_geometry"}) | |
42 | + | |
43 | + merged = pd.merge(boarder_SIG, negative_SIG, on="GID").reset_index() | |
44 | + | |
45 | + merged["area_ratio"] = merged["negative_area"] / merged["boarder_area"] | |
46 | + merged = merged.set_geometry("negative_geometry") | |
47 | + merged = merged.drop(columns="boarder_geometry") | |
48 | + | |
49 | + | |
50 | + write_dataframe(merged, f"DATA/refined/geopackage/100x100음영구역_비율/음영구역비율_{SIG[0]}.gpkg") |
+++ tools/__pycache__/spatial_indexed_intersection.cpython-311.pyc
Binary file is not shown |
+++ tools/equation.py
... | ... | @@ -0,0 +1,2 @@ |
1 | +import numpy as np | |
2 | +import pandas as pd |
+++ tools/spatial_indexed_intersection.py
... | ... | @@ -0,0 +1,111 @@ |
1 | +import geopandas as gpd | |
2 | +import pandas as pd | |
3 | +from concurrent.futures import ThreadPoolExecutor, as_completed | |
4 | + | |
5 | +import geopandas as gpd | |
6 | +import pandas as pd | |
7 | +from concurrent.futures import ProcessPoolExecutor | |
8 | +import os | |
9 | + | |
10 | + | |
11 | +# def prepare_tasks(under, over, task_que): | |
12 | +# task_list = [] | |
13 | +# # We want to ensure minimal data is being moved around, so we prepare data slices here | |
14 | +# # that will be passed directly to each worker process | |
15 | +# for under_index, over_indices in task_que.items(): | |
16 | +# # Extract only the necessary rows from 'under' and 'over' and create lightweight copies | |
17 | +# under_slice = under.iloc[[under_index]].copy() # Copy to ensure continuity outside the loop | |
18 | +# over_slice = over.iloc[over_indices].copy() # Copy to ensure continuity outside the loop | |
19 | +# task_list.append({ | |
20 | +# 'under_data': under_slice, | |
21 | +# 'over_data': over_slice | |
22 | +# }) | |
23 | +# return task_list | |
24 | +# | |
25 | +# | |
26 | +# def process_intersection(task, under_data, over_data, how="intersection"): | |
27 | +# # Extract the relevant portions of the dataframes | |
28 | +# ref_poly = under_data.iloc[[task['under_index']]] | |
29 | +# comp_poly = over_data.iloc[task['over_indices']] | |
30 | +# # Perform the overlay operation | |
31 | +# intersection = ref_poly.overlay(comp_poly, how=how, keep_geom_type=False) | |
32 | +# return intersection | |
33 | +# | |
34 | +# | |
35 | +# def geom_overlay(under, over, how="intersection"): | |
36 | +# if under.crs != over.crs: | |
37 | +# print(f"Error: CRS mismatch \nUnder CRS: {under.crs}, \nOver CRS: {over.crs}") | |
38 | +# return None | |
39 | +# print(f"under : {len(under)}, over : {len(over)}") | |
40 | +# if len(under) > len(over): | |
41 | +# under, over = over, under | |
42 | +# | |
43 | +# print("creating spatial index queries...") | |
44 | +# sindex_query_result = under.sindex.query(over["geometry"], predicate="intersects") | |
45 | +# task_que = {} | |
46 | +# for key, value in zip(sindex_query_result[1], sindex_query_result[0]): | |
47 | +# if key in task_que: | |
48 | +# task_que[key].append(value) | |
49 | +# else: | |
50 | +# task_que[key] = [value] | |
51 | +# | |
52 | +# print("preparing the task...") | |
53 | +# print(len(task_que)) | |
54 | +# task_list = prepare_tasks(under, over, task_que) | |
55 | +# | |
56 | +# print("executing...") | |
57 | +# with ProcessPoolExecutor(max_workers=os.cpu_count()) as executor: | |
58 | +# future_to_task = {executor.submit(process_intersection, task, how): task for task in task_list} | |
59 | +# total_tasks = len(future_to_task) | |
60 | +# completed_tasks = 0 | |
61 | +# | |
62 | +# for future in as_completed(future_to_task): | |
63 | +# completed_tasks += 1 | |
64 | +# print(f"Completed {completed_tasks} of {total_tasks} tasks") | |
65 | +# result = future.result() # Collect the result from the future | |
66 | +# | |
67 | +# resulting_intersection_geom = pd.concat([future.result() for future in future_to_task]) | |
68 | +# return resulting_intersection_geom | |
69 | + | |
70 | + | |
71 | +def geom_overlay(under, over, how="intersection"): | |
72 | + under_crs = over.crs | |
73 | + over_crs = under.crs | |
74 | + | |
75 | + # switch values for optimization | |
76 | + if len(under) > len (over): | |
77 | + under, over = over, under | |
78 | + | |
79 | + if over_crs != under_crs: | |
80 | + print(f"error : crs_mismatch \n under : {under_crs},\n over: {over_crs}") | |
81 | + exit() | |
82 | + | |
83 | + print("creating_spatial index...") | |
84 | + sindex_query_result = under.sindex.query(over["geometry"], predicate="intersects") | |
85 | + | |
86 | + # key is the reference(under) polygon index, and value is comparative(over) polygon index | |
87 | + # I recommend to use reference layer with fewer polygons then comparative one. (saves search time) | |
88 | + task_que = {} | |
89 | + for key, value in zip(sindex_query_result[1], sindex_query_result[0]): | |
90 | + if key in task_que: | |
91 | + task_que[key].append(value) | |
92 | + else: | |
93 | + task_que[key] = [value] | |
94 | + | |
95 | + intersections = [] | |
96 | + print("executing...") | |
97 | + for i, key in enumerate(task_que): | |
98 | + # double brackets for... not passing row as a column. | |
99 | + # how intuitive (clap. clap. clap.) | |
100 | + ref_poly = under.iloc[[key]] | |
101 | + ref_poly = gpd.GeoDataFrame(ref_poly, crs=over_crs) | |
102 | + comp_poly = over.iloc[task_que[key]] | |
103 | + intersection = ref_poly.overlay(comp_poly, how=how, keep_geom_type=False) | |
104 | + intersections.append(intersection) | |
105 | + print(f"{i}/{len(task_que)}") | |
106 | + | |
107 | + if len(intersections) > 0: | |
108 | + resulting_intersection_geom = pd.concat(intersections) | |
109 | + else : | |
110 | + resulting_intersection_geom = [] | |
111 | + return resulting_intersection_geom(파일 끝에 줄바꿈 문자 없음) |
+++ 건축물대장.py
... | ... | @@ -0,0 +1,33 @@ |
1 | +import pandas as pd | |
2 | +import numpy as np | |
3 | + | |
4 | +debug = False | |
5 | + | |
6 | +if __name__ == "__main__": | |
7 | + df = pd.read_csv("DATA/refined/건축물대장/건축물대장_전국_필터.csv") | |
8 | + print(len(df)) | |
9 | + if debug: | |
10 | + df = df.iloc[:7000] | |
11 | + | |
12 | + out = pd.DataFrame(columns=["기초지자체","연면적합", "연면적_단독주택합", "비율"]) | |
13 | + | |
14 | + df["address"] = (df["시도"] +"_"+ df["시군구"]).replace(" ", "_", regex=True) | |
15 | + groups = df.groupby("address") | |
16 | + for group in groups: | |
17 | + addr = group[0] | |
18 | + df_gr = group[1] | |
19 | + print(len(df_gr)) | |
20 | + 연면적 = df_gr["연면적(㎡)"].sum() | |
21 | + 연면적_단독주택 = df_gr[df_gr["주용도"] == "단독주택"]["연면적(㎡)"].sum() | |
22 | + 비율 = 연면적_단독주택 / 연면적 | |
23 | + row = { | |
24 | + "기초지자체" : [addr], | |
25 | + "연면적합": [연면적], | |
26 | + "연면적_단독주택합" : [연면적_단독주택], | |
27 | + "비율" : [비율] | |
28 | + } | |
29 | + row = pd.DataFrame(row) | |
30 | + out = pd.concat((out, row)) | |
31 | + pass | |
32 | + out.to_csv("DATA/processed/전국_기초지자체_단독주택연면적_비율.csv", index=False) | |
33 | + pass(파일 끝에 줄바꿈 문자 없음) |
+++ 건축물대장2grid.py
... | ... | @@ -0,0 +1,65 @@ |
1 | +import pandas as pd | |
2 | +import geopandas as gpd | |
3 | +from pyogrio import read_dataframe, write_dataframe | |
4 | +from shapely.geometry import Point | |
5 | + | |
6 | + | |
7 | +buildings_df = pd.read_csv("DATA/refined/건축물대장/concat.csv", low_memory=False) | |
8 | + | |
9 | +grid_df = read_dataframe('DATA/refined/geopackage/시군구읍면동_경상북도_100x100.gpkg', encoding='utf-8') | |
10 | + | |
11 | +grid_df = grid_df.to_crs("epsg:4326") | |
12 | + | |
13 | +def is_float(x): | |
14 | + try: | |
15 | + float(x) | |
16 | + return True | |
17 | + except ValueError: | |
18 | + return False | |
19 | + | |
20 | +valid_buildings_df = buildings_df[buildings_df['longitude'].apply(is_float) & buildings_df['latitude'].apply(is_float)] | |
21 | + | |
22 | +valid_buildings_df['geometry'] = valid_buildings_df.apply(lambda row: Point(float(row['longitude']), float(row['latitude'])), axis=1) | |
23 | + | |
24 | +buildings_gdf = gpd.GeoDataFrame(valid_buildings_df, geometry='geometry') | |
25 | + | |
26 | +buildings_gdf.set_crs(grid_df.crs, inplace=True) | |
27 | + | |
28 | +joined_df = gpd.sjoin(buildings_gdf, grid_df, how='inner', op='within') | |
29 | + | |
30 | +# this returns series object, so there is no 'columns', but you can name it. | |
31 | +buildings_count = joined_df.groupby('SPO_NO_CD').size() | |
32 | +buildings_count.name = '건물_수' | |
33 | + | |
34 | +buildings_area_sum = joined_df.groupby('SPO_NO_CD')['연면적(㎡)'].sum() | |
35 | +buildings_area_sum.name = '연면적(㎡)합' | |
36 | + | |
37 | +buildings_area_sum_house = joined_df[joined_df['주용도'] == '단독주택'].groupby('SPO_NO_CD')['연면적(㎡)'].sum() | |
38 | +buildings_area_sum_house.name = '단독주택_연면적(㎡)합' | |
39 | + | |
40 | +buildings_house_ratio = buildings_area_sum_house / buildings_area_sum | |
41 | +buildings_house_ratio.name = '단독주택_연면적(㎡)_비율' | |
42 | + | |
43 | +buildings_count.to_csv('building_counts_per_grid.csv') | |
44 | + | |
45 | +buildings_area_sum.to_csv('building_area_sums_per_grid.csv') | |
46 | + | |
47 | +buildings_area_sum_house.to_csv('houses_area_sums_per_grid.csv') | |
48 | + | |
49 | +buildings_house_ratio.to_csv('house_ratio_per_grid.csv') | |
50 | + | |
51 | + | |
52 | +# Drop duplicates to avoid multiple entries of the same base_road square | |
53 | +final_gdf = joined_df.drop_duplicates(subset=['SPO_NO_CD']) | |
54 | + | |
55 | +summary_df = pd.DataFrame({'BLDG_CNT': buildings_count, 'FLOOR_AREA_SUM': buildings_area_sum, 'HOUSE_FLOOR_AREA_SUM': buildings_area_sum_house, 'HOUSE_FLOOR_AREA_RATIO': buildings_house_ratio}).reset_index() | |
56 | + | |
57 | +summary_df.fillna(0) | |
58 | + | |
59 | +summary_gdf = grid_df.merge(summary_df, on='SPO_NO_CD') | |
60 | + | |
61 | +# Export to Shapefile | |
62 | +write_dataframe(summary_gdf, 'DATA/processed/건물연면적/buildings_summary_per_grid.gpkg') | |
63 | + | |
64 | +# Do not use geopandas for saving files, it will corrupt non-latin characters by incorrectly assign encodings for it. (always latin-1) | |
65 | +# summary_gdf.to_file('DATA/processed/건물연면적/buildings_summary_per_grid.shp', encoding='utf-8')(파일 끝에 줄바꿈 문자 없음) |
+++ 건축물대장_geocoding.py
... | ... | @@ -0,0 +1,66 @@ |
1 | +import pandas as pd | |
2 | +import glob | |
3 | +import os | |
4 | +from tools.geocode.naver_geocode import Naver_Map | |
5 | +from time import sleep | |
6 | + | |
7 | + | |
8 | +if __name__ == "__main__": | |
9 | + all_files = glob.glob("DATA/refined/건축물대장/지오코딩전_파일_분리/*.csv") | |
10 | + completed_files = glob.glob("DATA/refined/건축물대장/지오코드/*.csv") | |
11 | + | |
12 | + def adjust_filename(filename): | |
13 | + # Remove directory and extension, then strip '_geocoded' | |
14 | + base_name = os.path.basename(filename) | |
15 | + return base_name.replace('_geocoded', '') | |
16 | + completed_files_set = set(adjust_filename(f) for f in completed_files) | |
17 | + remaining_files = [f for f in all_files if os.path.basename(f) not in completed_files_set] | |
18 | + | |
19 | + naver_geocode = Naver_Map() | |
20 | + for csv in remaining_files: | |
21 | + df = pd.read_csv(csv) | |
22 | + | |
23 | + latitudes = [] | |
24 | + longitudes = [] | |
25 | + | |
26 | + df["address"] = df.apply( | |
27 | + lambda row: | |
28 | + f"{row['시도']} {row['시군구']} {row['법정동']}" + | |
29 | + ("" if row['번'] == 0 or row['번'] == "" else f" {pd.to_numeric(str(row['번']), errors = 'ignore', downcast='integer')}") + | |
30 | + ("" if row['지'] == 0 or row['지'] == "" else f"-{pd.to_numeric(str(row['지']), errors = 'ignore', downcast='integer')}"), | |
31 | + axis=1 | |
32 | + ) | |
33 | + | |
34 | + previous_addr = None | |
35 | + for i, addr in enumerate(df["address"]): | |
36 | + print(f"{i}/{len(df)} : {addr}") | |
37 | + current_addr = addr | |
38 | + # compare if this is duplicate | |
39 | + # 같은 주소지에 아파트나 블럭단위로 건설이 진행되어 여러 동이 같이 있는 경우 일어남. | |
40 | + if current_addr == previous_addr: | |
41 | + no_api_call_flag = True | |
42 | + else: | |
43 | + no_api_call_flag = False | |
44 | + | |
45 | + if not no_api_call_flag: | |
46 | + response = naver_geocode.geocoding(addr) | |
47 | + if response: | |
48 | + lat = response[0] | |
49 | + lon = response[1] | |
50 | + latitudes.append(lat) | |
51 | + longitudes.append(lon) | |
52 | + sleep(0.5) | |
53 | + else: | |
54 | + lat = "INVALID" | |
55 | + lon = "INVALID" | |
56 | + latitudes.append(lat) | |
57 | + longitudes.append(lon) | |
58 | + sleep(0.5) | |
59 | + else: | |
60 | + latitudes.append(lat) | |
61 | + longitudes.append(lon) | |
62 | + previous_addr = addr | |
63 | + df['latitude'] = latitudes | |
64 | + df['longitude'] = longitudes | |
65 | + | |
66 | + df.to_csv(csv.replace(".csv", "_geocoded.csv").replace("지오코딩전_파일_분리","지오코드"), index=False)(파일 끝에 줄바꿈 문자 없음) |
+++ 범죄주의구역_점수산출.py
... | ... | @@ -0,0 +1,24 @@ |
1 | +import glob | |
2 | + | |
3 | +import geopandas as gpd | |
4 | +import pandas as pd | |
5 | +from pyogrio import read_dataframe, write_dataframe | |
6 | +import numpy as np | |
7 | +from tqdm import tqdm | |
8 | +from shapely.geometry import box, MultiPolygon | |
9 | +from joblib import Parallel, delayed | |
10 | + | |
11 | +from tools.spatial_indexed_intersection import geom_overlay | |
12 | + | |
13 | + | |
14 | +road_list_by_region = ["DATA/refined/geopackage/범죄주의구역_격자/*.gpkg"] | |
15 | + | |
16 | +base_road = read_dataframe("DATA/refined/geopackage/도로명주소/실폭도로_생활안전도로_0등급_dissolve.gpkg") | |
17 | + | |
18 | +for region in road_list_by_region: | |
19 | + region_gpkg = read_dataframe(region) | |
20 | + region_gpkg = region_gpkg.to_crs(base_road.crs) | |
21 | + | |
22 | + # reset_index() is to preserve GID, the geometry index provided in the original base_road file | |
23 | + result = region_gpkg.dissolve(by="GID").reset_index() | |
24 | + result["area"] = result['geometry'].area |
+++ 상가2gird.py
... | ... | @@ -0,0 +1,64 @@ |
1 | +import pandas as pd | |
2 | +import geopandas as gpd | |
3 | +from pyogrio import read_dataframe, write_dataframe | |
4 | +from shapely.geometry import Point | |
5 | + | |
6 | + | |
7 | +buildings_df = pd.read_csv("DATA/raw/상가정보/소상공인시장진흥공단_상가(상권)정보_경북_202403.csv", low_memory=False) | |
8 | + | |
9 | +grid_df = read_dataframe('DATA/refined/geopackage/시군구읍면동_경상북도_100x100.gpkg', encoding='utf-8') | |
10 | + | |
11 | +grid_df = grid_df.to_crs("epsg:4326") | |
12 | + | |
13 | +def is_float(x): | |
14 | + try: | |
15 | + float(x) | |
16 | + return True | |
17 | + except ValueError: | |
18 | + return False | |
19 | + | |
20 | +valid_buildings_df = buildings_df[buildings_df['경도'].apply(is_float) & buildings_df['위도'].apply(is_float)] | |
21 | + | |
22 | +valid_buildings_df['geometry'] = valid_buildings_df.apply(lambda row: Point(float(row['경도']), float(row['위도'])), axis=1) | |
23 | + | |
24 | +buildings_gdf = gpd.GeoDataFrame(valid_buildings_df, geometry='geometry') | |
25 | + | |
26 | +buildings_gdf.set_crs(grid_df.crs, inplace=True) | |
27 | + | |
28 | +joined_df = gpd.sjoin(buildings_gdf, grid_df, how='inner', op='within') | |
29 | + | |
30 | +# this returns series object, so there is no 'columns', but you can name it. | |
31 | +stores = joined_df.groupby('SPO_NO_CD').size() | |
32 | +stores.name = '상가_수' | |
33 | + | |
34 | +# buildings_area_sum = joined_df.groupby('SPO_NO_CD')['연면적(㎡)'].sum() | |
35 | +# buildings_area_sum.name = '연면적(㎡)합' | |
36 | + | |
37 | +selection_slice = (joined_df["상권업종중분류명"] == '일반 숙박') | (joined_df["상권업종중분류명"] == "주점") | |
38 | + | |
39 | +inn_and_hedonic = joined_df[selection_slice].groupby('SPO_NO_CD').size() | |
40 | +inn_and_hedonic.name = '숙박 및 유해업소수' | |
41 | + | |
42 | +ratio_of_inn_and_hedonic = inn_and_hedonic.divide(stores).where(stores >= 5, 0) | |
43 | +ratio_of_inn_and_hedonic.name = '숙박및 유해업소 비율' | |
44 | + | |
45 | +stores.to_csv('store_counts.csv') | |
46 | + | |
47 | +inn_and_hedonic.to_csv('inn_and_hedonic.csv') | |
48 | + | |
49 | +ratio_of_inn_and_hedonic.to_csv('inn_and_hedonic_ratio.csv') | |
50 | + | |
51 | +# Drop duplicates to avoid multiple entries of the same base_road square | |
52 | +final_gdf = joined_df.drop_duplicates(subset=['SPO_NO_CD']) | |
53 | + | |
54 | +summary_df = pd.DataFrame({'STORE_CNT': stores, 'IH_CNT': inn_and_hedonic, 'IH_RATIO': ratio_of_inn_and_hedonic}).reset_index() | |
55 | + | |
56 | +summary_df.fillna(0) | |
57 | + | |
58 | +summary_gdf = grid_df.merge(summary_df, on='SPO_NO_CD') | |
59 | + | |
60 | +# Export to Shapefile | |
61 | +write_dataframe(summary_gdf, 'DATA/processed/유흥_숙박업소/inn_and_hedonic.gpkg') | |
62 | + | |
63 | +# Do not use geopandas for saving files, it will corrupt non-latin characters by incorrectly assign encodings for it. (always latin-1) | |
64 | +# summary_gdf.to_file('DATA/processed/건물연면적/buildings_summary_per_grid.shp', encoding='utf-8')(파일 끝에 줄바꿈 문자 없음) |
+++ 생활안전지도.py
... | ... | @@ -0,0 +1,41 @@ |
1 | +import glob | |
2 | + | |
3 | +import geopandas as gpd | |
4 | +import pandas as pd | |
5 | +from pyogrio import read_dataframe, write_dataframe | |
6 | +import numpy as np | |
7 | +from tqdm import tqdm | |
8 | +from shapely.geometry import box, MultiPolygon | |
9 | +from joblib import Parallel, delayed | |
10 | + | |
11 | +from tools.spatial_indexed_intersection import geom_overlay | |
12 | + | |
13 | + | |
14 | +regions = glob.glob("DATA/refined/geopackage/100x100/100m격자총인구.gpkg") | |
15 | + | |
16 | +boarder = "DATA/refined/geopackage/2024_05_경북_도로명지도/경북_읍면동_경계.gpkg" | |
17 | + | |
18 | +boarder = read_dataframe(boarder) | |
19 | + | |
20 | +blind_spot = ("DATA/refined/geopackage/경북읍면동-행정동_음영구역.gpkg") | |
21 | + | |
22 | +blind_spot = read_dataframe(blind_spot) | |
23 | + | |
24 | +def process_region(under, upper, postfix=""): | |
25 | + print(postfix) | |
26 | + under_name = under | |
27 | + under = read_dataframe(under) | |
28 | + under = under.to_crs(upper.crs) | |
29 | + print(under.crs) | |
30 | + upper_transformed = upper.to_crs(under.crs) | |
31 | + blind_area_by_grid = geom_overlay(upper_transformed, under) | |
32 | + write_dataframe(blind_area_by_grid, f"DATA/refined/geopackage/음영구역_격자/{under_name.split('/')[-1]}{postfix}.gpkg") | |
33 | + | |
34 | + | |
35 | +process_region(regions[0], blind_spot, "격자") | |
36 | +process_region(regions[0], boarder, "격자_경계") | |
37 | +# | |
38 | +# # Execute the processing in parallel | |
39 | +# results = Parallel(n_jobs=1)(delayed(process_region)(region, blind_spot) for region in regions) | |
40 | +# print("!!!") | |
41 | +# results = Parallel(n_jobs=1)(delayed(process_region)(region, boarder, "경계") for region in regions)(파일 끝에 줄바꿈 문자 없음) |
+++ 생활안전지도_범죄주의구역_영역계산.py
... | ... | @@ -0,0 +1,44 @@ |
1 | +import glob | |
2 | + | |
3 | +from pyogrio import read_dataframe, write_dataframe | |
4 | + | |
5 | +범죄주의구역_list_by_score = glob.glob("DATA/refined/geopackage/범죄주의구역_격자/*.gpkg") | |
6 | +범죄주의구역_list_by_score = sorted(범죄주의구역_list_by_score) | |
7 | +범죄주의구역_list_by_score = 범죄주의구역_list_by_score[-10:] | |
8 | +print(범죄주의구역_list_by_score) | |
9 | + | |
10 | + | |
11 | +도로실폭_격자_데이터 = read_dataframe("DATA/refined/geopackage/도로명주소/실폭도로_생활안전도로_0등급_dissolve.gpkg") | |
12 | +도로실폭_격자_데이터["road_area"]=도로실폭_격자_데이터["geometry"].area | |
13 | +도로실폭_격자_데이터 = 도로실폭_격자_데이터.set_index("GID") | |
14 | + | |
15 | + | |
16 | +격자_데이터 = read_dataframe("/home/juni/문서/경상북도_CCTV_설치_분석/DATA/refined/geopackage/100x100/100m격자총인구.gpkg") | |
17 | +격자_데이터 = 격자_데이터.set_index("GID") | |
18 | +격자_데이터 = 격자_데이터.merge(도로실폭_격자_데이터["road_area"], left_index=True, right_index=True, how="left") | |
19 | + | |
20 | + | |
21 | +for i, criminal in enumerate(범죄주의구역_list_by_score): | |
22 | + crimin_area = read_dataframe(criminal) | |
23 | + crimin_area = crimin_area.set_index("GID") | |
24 | + col_index = f"{i+1}등급영역" | |
25 | + crimin_area[col_index] = crimin_area["geometry"].area | |
26 | + 격자_데이터 = 격자_데이터.merge(crimin_area[col_index], left_index=True, right_index=True, how="left") | |
27 | + | |
28 | +def score_calc(row): | |
29 | + ret = 0 | |
30 | + if row["road_area"] != 0: | |
31 | + for i in range(10): | |
32 | + ret += row[f"{i + 1}등급영역"] | |
33 | + ret = ret * 0.1 | |
34 | + ret = ret / row["road_area"] | |
35 | + else: | |
36 | + ret = 0 | |
37 | + | |
38 | + return ret | |
39 | + | |
40 | +격자_데이터 = 격자_데이터.fillna(0) | |
41 | +격자_데이터["범죄취약점수"] = 격자_데이터.apply(lambda row : score_calc(row), axis=1) | |
42 | +격자_데이터 = 격자_데이터.reset_index() | |
43 | +write_dataframe(격자_데이터, "DATA/processed/범죄취약지수/범죄취약등급.gpkg") | |
44 | + |
+++ 아파트구획_추출.py
... | ... | @@ -0,0 +1,64 @@ |
1 | +import re | |
2 | +import pyogrio | |
3 | +import pandas as pd | |
4 | +import geopandas as gpd | |
5 | +import multiprocessing as mp | |
6 | +import fiona | |
7 | +from tqdm import tqdm | |
8 | + | |
9 | + | |
10 | +def filter_rows_by_regex(file_path, pattern, chunk_size, chunk_num, queue): | |
11 | + gdf_chunk = pyogrio.read_dataframe(file_path, skip_features=chunk_num * chunk_size, max_features=chunk_size) | |
12 | + regex = re.compile(pattern) | |
13 | + filtered_chunk = gdf_chunk[gdf_chunk['A8'].str.contains(regex, na=False)] | |
14 | + | |
15 | + queue.put(1) # Indicate progress | |
16 | + return filtered_chunk | |
17 | + | |
18 | +def parallel_process(file_path, pattern, chunk_size): | |
19 | + # Get the number of features (rows) using fiona | |
20 | + with fiona.open(file_path) as src: | |
21 | + num_rows = len(src) | |
22 | + | |
23 | + num_chunks = (num_rows // chunk_size) + 1 | |
24 | + | |
25 | + # Create a multiprocessing pool | |
26 | + manager = mp.Manager() | |
27 | + queue = manager.Queue() | |
28 | + pool = mp.Pool(mp.cpu_count()) | |
29 | + | |
30 | + # Create a progress bar | |
31 | + pbar = tqdm(total=num_chunks, desc="Processing Chunks") | |
32 | + | |
33 | + # Process each chunk in parallel and track progress | |
34 | + results = [] | |
35 | + for i in range(num_chunks): | |
36 | + result = pool.apply_async(filter_rows_by_regex, args=(file_path, pattern, chunk_size, i, queue)) | |
37 | + results.append(result) | |
38 | + | |
39 | + # Close the pool | |
40 | + pool.close() | |
41 | + | |
42 | + # Update progress bar based on queue | |
43 | + for _ in range(num_chunks): | |
44 | + queue.get() | |
45 | + pbar.update(1) | |
46 | + | |
47 | + # Wait for all processes to finish | |
48 | + pool.join() | |
49 | + pbar.close() | |
50 | + | |
51 | + # Combine the results | |
52 | + filtered_chunks = [result.get() for result in results] | |
53 | + filtered_data = gpd.GeoDataFrame(pd.concat(filtered_chunks, ignore_index=True)) | |
54 | + | |
55 | + return filtered_data | |
56 | + | |
57 | + | |
58 | +# Example usage | |
59 | +file_path = 'DATA/refined/geopackage/토지이용계획정보_국토교통부_경북_20240406.gpkg' | |
60 | +pattern = r'아파트|공동주택' | |
61 | +filtered_data = parallel_process(file_path, pattern, 8128) | |
62 | + | |
63 | +# Save or further process the resulting GeoDataFrame | |
64 | +pyogrio.write_dataframe(filtered_data, "아파트구획_경북.gpkg")(파일 끝에 줄바꿈 문자 없음) |
Add a comment
Delete comment
Once you delete this comment, you won't be able to recover it. Are you sure you want to delete this comment?