윤영준 윤영준 08-01
Hello Yona
@383523f02308c447f28798fafd505963c20effec
 
CCTV_감시구역.py (added)
+++ CCTV_감시구역.py
@@ -0,0 +1,42 @@
+import pandas as pd
+import geopandas as gpd
+from pyogrio import read_dataframe, write_dataframe
+from shapely.geometry import Point
+
+
+grid_df = read_dataframe('DATA/refined/geopackage/시군구별/중분류_37100_경산시.gpkg', encoding='utf-8')
+
+light_df = pd.read_csv("DATA/raw/보안등/경상북도_경산시_보안등정보_20230711.csv", encoding="ms949")
+
+grid_df = grid_df.to_crs("epsg:4326")
+
+def is_float(x):
+    try:
+        float(x)
+        return True
+    except ValueError:
+        return False
+
+valid_lightings_df = light_df[light_df['경도'].apply(is_float) & light_df['위도'].apply(is_float)]
+
+valid_lightings_df['geometry'] = valid_lightings_df.apply(lambda row: Point(float(row['경도']), float(row['위도'])), axis=1)
+valid_lightings_df = gpd.GeoDataFrame(valid_lightings_df, geometry='geometry')
+
+
+buffer_radius = 50
+
+points_buffered = valid_lightings_df.buffer(buffer_radius)
+valid_lightings_df = points_buffered
+
+def calculate_intersection_ratio(polygon, points_buffered):
+    intersections = points_buffered.intersection(polygon)
+    intersection_area = intersections.area.sum()
+    return intersection_area / polygon.area
+
+# Apply the function to each polygon
+grid_df['intersection_ratio'] = grid_df.apply(
+    lambda row: calculate_intersection_ratio(row['geometry'], points_buffered),
+    axis=1
+)
+
+write_dataframe(grid_df, "감시비율(100m).gpkg")(파일 끝에 줄바꿈 문자 없음)
 
CCTV_감시구역_격자.gpkg (Binary) (added)
+++ CCTV_감시구역_격자.gpkg
Binary file is not shown
 
CCTV설치점수산출.py (added)
+++ CCTV설치점수산출.py
@@ -0,0 +1,141 @@
+from pyogrio import read_dataframe, write_dataframe
+import glob
+import pandas as pd
+import numpy as np
+import geopandas as gpd
+import json
+from joblib import Parallel, delayed
+
+from tools.spatial_indexed_intersection import geom_overlay
+
+
+GRID_INDEX = "GID"
+LOCALE_INDEX = "EMD_CD"
+
+SIG_CODE = [
+    ["경산", "47290"],
+    ["경주", "47130"],
+    ["구미", "47190"],
+    ["김천", "47150"],
+    ["안동", "47170"],
+    ["영주", "47210"],
+    ["영천", "47230"],
+    ["예천", "47900"],
+    ["칠곡", "47850"],
+    ["포항_남구", "47111"],
+    ["포항_북구", "47113"]
+]
+
+
+gpkg_datas = glob.glob("DATA/processed/점수산출_등급화전_전체데이터/v4/*.gpkg")
+
+house_ratio_grid_path = "DATA/processed/건물연면적/buildings_summary_per_grid.gpkg"
+house_ratio_grid_columns_to_extract = ["BLDG_CNT", "FLOOR_AREA_SUM","HOUSE_FLOOR_AREA_SUM", "HOUSE_FLOOR_AREA_RATIO"]
+
+
+def range_creator(df, column, num_ranges):
+    """
+    Creates a list of tuples representing ranges based on percentiles.
+
+    Parameters:
+    df (pd.DataFrame): The input dataframe.
+    column (str): The column name to calculate the ranges for.
+    num_ranges (int): The number of ranges to create.
+
+    Returns:
+    list of tuples: Each tuple contains the start and end of a range.
+    """
+    percentiles = np.linspace(0, 100, num_ranges + 1)
+    values = np.percentile(df[column], percentiles)
+    ranges = [(values[i], values[i + 1]) for i in range(len(values) - 1)]
+    return ranges
+
+def map_value_to_range(df, column, ranges):
+    """
+    Maps values in a column to the specified ranges.
+
+    Parameters:
+    df (pd.DataFrame): The input dataframe.
+    column (str): The column name to map values for.
+    ranges (list of tuples): The list of ranges to map values to.
+
+    Returns:
+    pd.Series: A series of the same length as the input dataframe,
+               where each value is the index of the range it falls into.
+    """
+    def map_value_to_range(value):
+        for i, (start, end) in enumerate(ranges):
+            if start <= value <= end:
+                return i + 1
+        return np.nan
+
+    return df[column].apply(map_value_to_range)
+
+def export_geojson_into_superset_compat_data(df, name):
+    print("폴리곤 데이터가 한 줄에 한 도형인지 다시 확인하세요! 별도의 예외처리가 없습니다!")
+    df = df.to_crs("epsg:4326")
+    df = df.fillna(0)
+    # converting shapely format into geojson for SUPERSET visualization use
+    df["geometry"] = df["geometry"].apply(lambda row : row.__geo_interface__["coordinates"][0][0])
+    #... mapbox don't like single quote...
+    df["geometry"] = df["geometry"].apply(lambda row : json.dumps(row, indent=4))
+
+    df.to_csv(f"{name}.csv")
+    # write_dataframe(df, f"DATA/processed/점수산출_등급화전_전체데이터/{i.split('/')[-1].split('.')[0]}.csv")
+
+def process_region(gpkg):
+    df = read_dataframe(gpkg)
+
+    df = df.fillna(0)
+
+    filename = gpkg.split('/')[-1].split('.')[0]
+
+    # exclude by rules
+    df = df[(df["총인구"] > 0) | (df["BLDG_CNT"] > 0)]
+    df = df[df["산지영역"] < 9000]
+    df = df[df["아파트단지영역"] < 1000]
+
+    # calculate
+    df["감시취약지수"] = (10000 - df["CCTV_감시영역"]) * 0.00352
+    df["범죄특성요인"] = df["범죄취약점수"] * 17.6
+    df["범죄예방요인"] = df["area_ratio"] * 14.7
+    df["환경요인"] = df["IH_RATIO"] * 2.9
+    인구밀집요인등급화 = range_creator(df, "총인구", 10)
+    df["인구밀집요인"] = map_value_to_range(df, "총인구", 인구밀집요인등급화) * 1.76
+
+    취약인구비율_평균 = df["취약인구_비율"].mean()
+    단독주택연면적_평균 = df["HOUSE_FLOOR_AREA_RATIO"].mean()
+    df["가중치1"] = df.apply(lambda row: 1 if row["취약인구_비율"] > 취약인구비율_평균 else 0, axis=1)
+    df["가중치2"] = df.apply(lambda row: 1 if row["HOUSE_FLOOR_AREA_RATIO"] > 단독주택연면적_평균 else 0, axis=1)
+
+
+    def calculate_score(row):
+        sum = row["감시취약지수"] + row["범죄특성요인"] + row["범죄예방요인"] + row["환경요인"] + row["인구밀집요인"]
+        가중치 = 1
+        if row["가중치1"] == 1 :
+            가중치 += 0.095
+        if row["가중치2"] == 1 :
+            가중치 += 0.041
+        return sum * 가중치
+
+    df["최종지수"] = df.apply(calculate_score, axis=1)
+    # print(df["최종지수"])
+    최종지수등급화 = range_creator(df, "최종지수", 100)
+    df["최종지수등급"] = map_value_to_range(df, "최종지수", 최종지수등급화)
+
+    save_loc = f"DATA/processed/최종데이터/{gpkg.split('/')[-1].split('.')[0]}_격자"
+    df = df.drop_duplicates(keep="first")
+    write_dataframe(df, f"{save_loc}.gpkg")
+    with open(f"{save_loc}_메타데이터.txt", 'w', encoding='utf-8') as file:
+        file.write(f"취약인구비율_평균 : {취약인구비율_평균}\n")
+        file.write(f"단독주택연면적_평균 : {단독주택연면적_평균}\n")
+        file.write(f"인구밀집요인등급화 : {인구밀집요인등급화}\n")
+        file.write(f"최종지수등급화 : {최종지수등급화}\n")
+        pd.set_option("display.max_rows", None)
+        file.write(f"등급별_격자수 : {df['최종지수등급'].value_counts().sort_index()}\n")
+    df = df.to_crs("epsg:4326")
+    df["centroid"] = df["geometry"].centroid
+    df.to_csv(f"{save_loc}_표준좌표계.csv")
+
+
+Parallel(n_jobs=11)(delayed(process_region)(gpkg) for gpkg in gpkg_datas)(파일 끝에 줄바꿈 문자 없음)
 
cctv_ratio.py (added)
+++ cctv_ratio.py
@@ -0,0 +1,19 @@
+import glob
+
+from pyogrio import read_dataframe, write_dataframe
+
+from tools.spatial_indexed_intersection import geom_overlay
+
+
+cctv_list_by_region = glob.glob("DATA/refined/geopackage/CCTV_감시영역/*.gpkg")
+
+grid = read_dataframe("DATA/refined/geopackage/100x100/100m격자총인구.gpkg")
+
+for region in cctv_list_by_region:
+    region_gpkg = read_dataframe(region)
+    region_gpkg = region_gpkg.to_crs(grid.crs)
+    result = geom_overlay(region_gpkg, grid)
+    result = result.dissolve(by="GID").reset_index()
+    result["CCTV_감시영역"] = result['geometry'].area
+    write_dataframe(result, f"DATA/refined/geopackage/CCTV_감시영역_격자/{region.split('/')[-1].split(',')[0]}_격자.gpkg")
+
 
crimin_area_cal.py (added)
+++ crimin_area_cal.py
@@ -0,0 +1,16 @@
+from pyogrio import read_dataframe, write_dataframe
+import geopandas as gpd
+import pandas as pd
+from joblib import Parallel, delayed
+
+def process_file(i, grid, grid_spatial_index):
+    crimin_data = read_dataframe(f"DATA/refined/geopackage/범죄주의구역_가공/범죄주의구역-경북{i+1}.gpkg")
+    crimin_data = crimin_data.to_crs("EPSG:5179")
+    crimin_data_sindex = crimin_data.sindex
+    crimin_overlap = grid.overlay(crimin_data, how="intersection")
+    write_dataframe(crimin_overlap,f"DATA/refined/geopackage/범죄주의구역_가공/범죄주의구역-경북-격자{i+1}.gpkg")
+
+grid = read_dataframe("DATA/refined/geopackage/시군구읍면동_경상북도_100x100.gpkg")
+grid_spatial_index = grid.sindex
+print("grid_loaded!")
+Parallel(n_jobs=-1)(delayed(process_file)(i, grid) for i in range(10))
 
survillence_area.py (added)
+++ survillence_area.py
@@ -0,0 +1,50 @@
+from pyogrio import write_dataframe, read_dataframe
+import pandas as pd
+import geopandas as gpd
+import numpy as np
+import glob
+
+
+SIG_CODE = [
+    ["경산", "47290"],
+    ["경주", "47130"],
+    ["구미", "47190"],
+    ["김천", "47150"],
+    ["안동", "47170"],
+    ["영주", "47210"],
+    ["영천", "47230"],
+    ["예천", "47900"],
+    ["칠곡", "47850"],
+    ["포항_남구", "47111"],
+    ["포항_북구", "47113"]
+]
+
+# origin = read_dataframe("DATA/refined/geopackage/100x100/100m격자총인구.gpkg")
+# print(origin.columns)
+boarder = read_dataframe("DATA/refined/geopackage/음영구역_격자/100m격자총인구.gpkg격자_경계.gpkg")
+print(boarder.columns)
+negative = read_dataframe("DATA/refined/geopackage/음영구역_격자/100m격자_음영구역_CPTED.gpkg")
+print(negative.columns)
+
+print("dataframe_loaded!")
+
+for SIG in SIG_CODE:
+    boarder_SIG = boarder[boarder["EMD_CD"].str.contains(SIG[1])]
+    negative_SIG = negative[negative["EMD_CD"].str.contains(SIG[1])]
+    # origin_SIG = origin[origin["EMD_CD"].str.contains(SIG[1])]
+    boarder_SIG = boarder_SIG.dissolve(by="GID")
+    negative_SIG = negative_SIG.dissolve(by="GID")
+    boarder_SIG["boarder_area"] = boarder_SIG["geometry"].area
+    negative_SIG["negative_area"] = negative_SIG["geometry"].area
+
+    boarder_SIG = boarder_SIG.rename(columns={"geometry" : "boarder_geometry"})
+    negative_SIG = negative_SIG.rename(columns={"geometry": "negative_geometry"})
+
+    merged = pd.merge(boarder_SIG, negative_SIG, on="GID").reset_index()
+
+    merged["area_ratio"] = merged["negative_area"] / merged["boarder_area"]
+    merged = merged.set_geometry("negative_geometry")
+    merged = merged.drop(columns="boarder_geometry")
+
+
+    write_dataframe(merged, f"DATA/refined/geopackage/100x100음영구역_비율/음영구역비율_{SIG[0]}.gpkg")
 
tools/__pycache__/spatial_indexed_intersection.cpython-311.pyc (Binary) (added)
+++ tools/__pycache__/spatial_indexed_intersection.cpython-311.pyc
Binary file is not shown
 
tools/equation.py (added)
+++ tools/equation.py
@@ -0,0 +1,2 @@
+import numpy as np
+import pandas as pd
 
tools/geocode/__pycache__/naver_geocode.cpython-311.pyc (Binary) (added)
+++ tools/geocode/__pycache__/naver_geocode.cpython-311.pyc
Binary file is not shown
 
tools/geocode/naver_geocode.py (added)
+++ tools/geocode/naver_geocode.py
@@ -0,0 +1,47 @@
+import pandas as pd
+import requests
+
+class Naver_Map():
+    def __init__(self):
+        self.Client_ID = 'qn386nob46'
+        self.Client_Secret = 'ozZTMIxI6ixH5dnGxLYsu7DolhN8jucUatYysJv1'
+        self.api_base_url =  f'https://naveropenapi.apigw.ntruss.com/map-geocode/v2/geocode?'
+        self.headers = {
+            'X-NCP-APIGW-API-KEY-ID' : self.Client_ID,
+            'X-NCP-APIGW-API-KEY' : self.Client_Secret
+        }
+
+    def geocoding(self, addr):
+        api_url = self.api_base_url + f"query={addr}"
+        r = requests.get(api_url, headers=self.headers)
+
+        if r.status_code == 200:
+            http_normal = True
+        else:
+            http_normal = False
+
+        if http_normal and r.json()['status'] != 'INVALID_REQUEST' and r.json()['meta']['count']!=0:
+            data = r.json()
+            lat = data['addresses'][0]['y']
+            long = data['addresses'][0]['x']
+            coord = [lat, long, data]
+            return coord
+        else:
+            print(r.json())
+            if http_normal:
+                print(r.json()['status'])
+            return False
+
+def extract(lst, i):
+    return [item[i] for item in lst]
+
+
+if __name__ == "__main__" :
+    import argparse
+    args = argparse.ArgumentParser()
+    args.add_argument("--address", type=str, help="위경도를 조회할 지번/도로명 주소")
+    args = args.parse_args()
+    addr = args.address
+    naver_geocode = Naver_Map()
+    result = naver_geocode.geocoding(addr)
+    print(result)(파일 끝에 줄바꿈 문자 없음)
 
tools/spatial_indexed_intersection.py (added)
+++ tools/spatial_indexed_intersection.py
@@ -0,0 +1,111 @@
+import geopandas as gpd
+import pandas as pd
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+import geopandas as gpd
+import pandas as pd
+from concurrent.futures import ProcessPoolExecutor
+import os
+
+
+# def prepare_tasks(under, over, task_que):
+#     task_list = []
+#     # We want to ensure minimal data is being moved around, so we prepare data slices here
+#     # that will be passed directly to each worker process
+#     for under_index, over_indices in task_que.items():
+#         # Extract only the necessary rows from 'under' and 'over' and create lightweight copies
+#         under_slice = under.iloc[[under_index]].copy()  # Copy to ensure continuity outside the loop
+#         over_slice = over.iloc[over_indices].copy()  # Copy to ensure continuity outside the loop
+#         task_list.append({
+#             'under_data': under_slice,
+#             'over_data': over_slice
+#         })
+#     return task_list
+#
+#
+# def process_intersection(task, under_data, over_data, how="intersection"):
+#     # Extract the relevant portions of the dataframes
+#     ref_poly = under_data.iloc[[task['under_index']]]
+#     comp_poly = over_data.iloc[task['over_indices']]
+#     # Perform the overlay operation
+#     intersection = ref_poly.overlay(comp_poly, how=how, keep_geom_type=False)
+#     return intersection
+#
+#
+# def geom_overlay(under, over, how="intersection"):
+#     if under.crs != over.crs:
+#         print(f"Error: CRS mismatch \nUnder CRS: {under.crs}, \nOver CRS: {over.crs}")
+#         return None
+#     print(f"under : {len(under)}, over : {len(over)}")
+#     if len(under) > len(over):
+#         under, over = over, under
+#
+#     print("creating spatial index queries...")
+#     sindex_query_result = under.sindex.query(over["geometry"], predicate="intersects")
+#     task_que = {}
+#     for key, value in zip(sindex_query_result[1], sindex_query_result[0]):
+#         if key in task_que:
+#             task_que[key].append(value)
+#         else:
+#             task_que[key] = [value]
+#
+#     print("preparing the task...")
+#     print(len(task_que))
+#     task_list = prepare_tasks(under, over, task_que)
+#
+#     print("executing...")
+#     with ProcessPoolExecutor(max_workers=os.cpu_count()) as executor:
+#         future_to_task = {executor.submit(process_intersection, task, how): task for task in task_list}
+#         total_tasks = len(future_to_task)
+#         completed_tasks = 0
+#
+#         for future in as_completed(future_to_task):
+#             completed_tasks += 1
+#             print(f"Completed {completed_tasks} of {total_tasks} tasks")
+#             result = future.result()  # Collect the result from the future
+#
+#     resulting_intersection_geom = pd.concat([future.result() for future in future_to_task])
+#     return resulting_intersection_geom
+
+
+def geom_overlay(under, over, how="intersection"):
+    under_crs = over.crs
+    over_crs = under.crs
+
+    # switch values for optimization
+    if len(under) > len (over):
+        under, over = over, under
+
+    if over_crs != under_crs:
+        print(f"error : crs_mismatch \n under : {under_crs},\n over: {over_crs}")
+        exit()
+
+    print("creating_spatial index...")
+    sindex_query_result = under.sindex.query(over["geometry"], predicate="intersects")
+
+    # key is the reference(under) polygon index, and value is comparative(over) polygon index
+    # I recommend to use reference layer with fewer polygons then comparative one. (saves search time)
+    task_que = {}
+    for key, value in zip(sindex_query_result[1], sindex_query_result[0]):
+        if key in task_que:
+            task_que[key].append(value)
+        else:
+            task_que[key] = [value]
+
+    intersections = []
+    print("executing...")
+    for i, key in enumerate(task_que):
+        # double brackets for... not passing row as a column.
+        # how intuitive (clap. clap. clap.)
+        ref_poly = under.iloc[[key]]
+        ref_poly = gpd.GeoDataFrame(ref_poly, crs=over_crs)
+        comp_poly = over.iloc[task_que[key]]
+        intersection = ref_poly.overlay(comp_poly, how=how, keep_geom_type=False)
+        intersections.append(intersection)
+        print(f"{i}/{len(task_que)}")
+
+    if len(intersections) > 0:
+        resulting_intersection_geom = pd.concat(intersections)
+    else :
+        resulting_intersection_geom = []
+    return resulting_intersection_geom(파일 끝에 줄바꿈 문자 없음)
 
건축물대장.py (added)
+++ 건축물대장.py
@@ -0,0 +1,33 @@
+import pandas as pd
+import numpy as np
+
+debug = False
+
+if __name__ == "__main__":
+    df = pd.read_csv("DATA/refined/건축물대장/건축물대장_전국_필터.csv")
+    print(len(df))
+    if debug:
+        df = df.iloc[:7000]
+
+    out = pd.DataFrame(columns=["기초지자체","연면적합", "연면적_단독주택합", "비율"])
+
+    df["address"] = (df["시도"] +"_"+ df["시군구"]).replace(" ", "_", regex=True)
+    groups = df.groupby("address")
+    for group in groups:
+        addr = group[0]
+        df_gr = group[1]
+        print(len(df_gr))
+        연면적 = df_gr["연면적(㎡)"].sum()
+        연면적_단독주택 = df_gr[df_gr["주용도"] == "단독주택"]["연면적(㎡)"].sum()
+        비율 =  연면적_단독주택 / 연면적
+        row = {
+            "기초지자체" : [addr],
+            "연면적합": [연면적],
+            "연면적_단독주택합" : [연면적_단독주택],
+            "비율" : [비율]
+        }
+        row = pd.DataFrame(row)
+        out = pd.concat((out, row))
+        pass
+    out.to_csv("DATA/processed/전국_기초지자체_단독주택연면적_비율.csv", index=False)
+    pass(파일 끝에 줄바꿈 문자 없음)
 
건축물대장2grid.py (added)
+++ 건축물대장2grid.py
@@ -0,0 +1,65 @@
+import pandas as pd
+import geopandas as gpd
+from pyogrio import read_dataframe, write_dataframe
+from shapely.geometry import Point
+
+
+buildings_df = pd.read_csv("DATA/refined/건축물대장/concat.csv", low_memory=False)
+
+grid_df = read_dataframe('DATA/refined/geopackage/시군구읍면동_경상북도_100x100.gpkg', encoding='utf-8')
+
+grid_df = grid_df.to_crs("epsg:4326")
+
+def is_float(x):
+    try:
+        float(x)
+        return True
+    except ValueError:
+        return False
+
+valid_buildings_df = buildings_df[buildings_df['longitude'].apply(is_float) & buildings_df['latitude'].apply(is_float)]
+
+valid_buildings_df['geometry'] = valid_buildings_df.apply(lambda row: Point(float(row['longitude']), float(row['latitude'])), axis=1)
+
+buildings_gdf = gpd.GeoDataFrame(valid_buildings_df, geometry='geometry')
+
+buildings_gdf.set_crs(grid_df.crs, inplace=True)
+
+joined_df = gpd.sjoin(buildings_gdf, grid_df, how='inner', op='within')
+
+# this returns series object, so there is no 'columns', but you can name it.
+buildings_count = joined_df.groupby('SPO_NO_CD').size()
+buildings_count.name = '건물_수'
+
+buildings_area_sum = joined_df.groupby('SPO_NO_CD')['연면적(㎡)'].sum()
+buildings_area_sum.name = '연면적(㎡)합'
+
+buildings_area_sum_house = joined_df[joined_df['주용도'] == '단독주택'].groupby('SPO_NO_CD')['연면적(㎡)'].sum()
+buildings_area_sum_house.name = '단독주택_연면적(㎡)합'
+
+buildings_house_ratio = buildings_area_sum_house / buildings_area_sum
+buildings_house_ratio.name = '단독주택_연면적(㎡)_비율'
+
+buildings_count.to_csv('building_counts_per_grid.csv')
+
+buildings_area_sum.to_csv('building_area_sums_per_grid.csv')
+
+buildings_area_sum_house.to_csv('houses_area_sums_per_grid.csv')
+
+buildings_house_ratio.to_csv('house_ratio_per_grid.csv')
+
+
+# Drop duplicates to avoid multiple entries of the same base_road square
+final_gdf = joined_df.drop_duplicates(subset=['SPO_NO_CD'])
+
+summary_df = pd.DataFrame({'BLDG_CNT': buildings_count, 'FLOOR_AREA_SUM': buildings_area_sum, 'HOUSE_FLOOR_AREA_SUM': buildings_area_sum_house, 'HOUSE_FLOOR_AREA_RATIO': buildings_house_ratio}).reset_index()
+
+summary_df.fillna(0)
+
+summary_gdf = grid_df.merge(summary_df, on='SPO_NO_CD')
+
+# Export to Shapefile
+write_dataframe(summary_gdf, 'DATA/processed/건물연면적/buildings_summary_per_grid.gpkg')
+
+# Do not use geopandas for saving files, it will corrupt non-latin characters by incorrectly assign encodings for it. (always latin-1)
+# summary_gdf.to_file('DATA/processed/건물연면적/buildings_summary_per_grid.shp', encoding='utf-8')(파일 끝에 줄바꿈 문자 없음)
 
건축물대장_geocoding.py (added)
+++ 건축물대장_geocoding.py
@@ -0,0 +1,66 @@
+import pandas as pd
+import glob
+import os
+from tools.geocode.naver_geocode import Naver_Map
+from time import sleep
+
+
+if __name__ == "__main__":
+    all_files = glob.glob("DATA/refined/건축물대장/지오코딩전_파일_분리/*.csv")
+    completed_files = glob.glob("DATA/refined/건축물대장/지오코드/*.csv")
+
+    def adjust_filename(filename):
+        # Remove directory and extension, then strip '_geocoded'
+        base_name = os.path.basename(filename)
+        return base_name.replace('_geocoded', '')
+    completed_files_set = set(adjust_filename(f) for f in completed_files)
+    remaining_files = [f for f in all_files if os.path.basename(f) not in completed_files_set]
+
+    naver_geocode = Naver_Map()
+    for csv in remaining_files:
+        df = pd.read_csv(csv)
+
+        latitudes = []
+        longitudes = []
+
+        df["address"] = df.apply(
+            lambda row:
+            f"{row['시도']} {row['시군구']} {row['법정동']}" +
+            ("" if row['번'] == 0 or row['번'] == "" else f" {pd.to_numeric(str(row['번']), errors = 'ignore', downcast='integer')}") +
+            ("" if row['지'] == 0 or row['지'] == "" else f"-{pd.to_numeric(str(row['지']), errors = 'ignore', downcast='integer')}"),
+            axis=1
+        )
+
+        previous_addr = None
+        for i, addr in enumerate(df["address"]):
+            print(f"{i}/{len(df)} : {addr}")
+            current_addr = addr
+            # compare if this is duplicate
+            # 같은 주소지에 아파트나 블럭단위로 건설이 진행되어 여러 동이 같이 있는 경우 일어남.
+            if current_addr == previous_addr:
+                no_api_call_flag = True
+            else:
+                no_api_call_flag = False
+
+            if not no_api_call_flag:
+                response = naver_geocode.geocoding(addr)
+                if response:
+                    lat = response[0]
+                    lon = response[1]
+                    latitudes.append(lat)
+                    longitudes.append(lon)
+                    sleep(0.5)
+                else:
+                    lat = "INVALID"
+                    lon = "INVALID"
+                    latitudes.append(lat)
+                    longitudes.append(lon)
+                    sleep(0.5)
+            else:
+                latitudes.append(lat)
+                longitudes.append(lon)
+            previous_addr = addr
+        df['latitude'] = latitudes
+        df['longitude'] = longitudes
+
+        df.to_csv(csv.replace(".csv", "_geocoded.csv").replace("지오코딩전_파일_분리","지오코드"), index=False)(파일 끝에 줄바꿈 문자 없음)
 
범죄주의구역_점수산출.py (added)
+++ 범죄주의구역_점수산출.py
@@ -0,0 +1,24 @@
+import glob
+
+import geopandas as gpd
+import pandas as pd
+from pyogrio import read_dataframe, write_dataframe
+import numpy as np
+from tqdm import tqdm
+from shapely.geometry import box, MultiPolygon
+from joblib import Parallel, delayed
+
+from tools.spatial_indexed_intersection import geom_overlay
+
+
+road_list_by_region = ["DATA/refined/geopackage/범죄주의구역_격자/*.gpkg"]
+
+base_road = read_dataframe("DATA/refined/geopackage/도로명주소/실폭도로_생활안전도로_0등급_dissolve.gpkg")
+
+for region in road_list_by_region:
+    region_gpkg = read_dataframe(region)
+    region_gpkg = region_gpkg.to_crs(base_road.crs)
+
+    # reset_index() is to preserve GID, the geometry index provided in the original base_road file
+    result = region_gpkg.dissolve(by="GID").reset_index()
+    result["area"] = result['geometry'].area
 
상가2gird.py (added)
+++ 상가2gird.py
@@ -0,0 +1,64 @@
+import pandas as pd
+import geopandas as gpd
+from pyogrio import read_dataframe, write_dataframe
+from shapely.geometry import Point
+
+
+buildings_df = pd.read_csv("DATA/raw/상가정보/소상공인시장진흥공단_상가(상권)정보_경북_202403.csv", low_memory=False)
+
+grid_df = read_dataframe('DATA/refined/geopackage/시군구읍면동_경상북도_100x100.gpkg', encoding='utf-8')
+
+grid_df = grid_df.to_crs("epsg:4326")
+
+def is_float(x):
+    try:
+        float(x)
+        return True
+    except ValueError:
+        return False
+
+valid_buildings_df = buildings_df[buildings_df['경도'].apply(is_float) & buildings_df['위도'].apply(is_float)]
+
+valid_buildings_df['geometry'] = valid_buildings_df.apply(lambda row: Point(float(row['경도']), float(row['위도'])), axis=1)
+
+buildings_gdf = gpd.GeoDataFrame(valid_buildings_df, geometry='geometry')
+
+buildings_gdf.set_crs(grid_df.crs, inplace=True)
+
+joined_df = gpd.sjoin(buildings_gdf, grid_df, how='inner', op='within')
+
+# this returns series object, so there is no 'columns', but you can name it.
+stores = joined_df.groupby('SPO_NO_CD').size()
+stores.name = '상가_수'
+
+# buildings_area_sum = joined_df.groupby('SPO_NO_CD')['연면적(㎡)'].sum()
+# buildings_area_sum.name = '연면적(㎡)합'
+
+selection_slice = (joined_df["상권업종중분류명"] == '일반 숙박') | (joined_df["상권업종중분류명"] == "주점")
+
+inn_and_hedonic = joined_df[selection_slice].groupby('SPO_NO_CD').size()
+inn_and_hedonic.name = '숙박 및 유해업소수'
+
+ratio_of_inn_and_hedonic = inn_and_hedonic.divide(stores).where(stores >= 5, 0)
+ratio_of_inn_and_hedonic.name = '숙박및 유해업소 비율'
+
+stores.to_csv('store_counts.csv')
+
+inn_and_hedonic.to_csv('inn_and_hedonic.csv')
+
+ratio_of_inn_and_hedonic.to_csv('inn_and_hedonic_ratio.csv')
+
+# Drop duplicates to avoid multiple entries of the same base_road square
+final_gdf = joined_df.drop_duplicates(subset=['SPO_NO_CD'])
+
+summary_df = pd.DataFrame({'STORE_CNT': stores, 'IH_CNT': inn_and_hedonic, 'IH_RATIO': ratio_of_inn_and_hedonic}).reset_index()
+
+summary_df.fillna(0)
+
+summary_gdf = grid_df.merge(summary_df, on='SPO_NO_CD')
+
+# Export to Shapefile
+write_dataframe(summary_gdf, 'DATA/processed/유흥_숙박업소/inn_and_hedonic.gpkg')
+
+# Do not use geopandas for saving files, it will corrupt non-latin characters by incorrectly assign encodings for it. (always latin-1)
+# summary_gdf.to_file('DATA/processed/건물연면적/buildings_summary_per_grid.shp', encoding='utf-8')(파일 끝에 줄바꿈 문자 없음)
 
생활안전지도.py (added)
+++ 생활안전지도.py
@@ -0,0 +1,41 @@
+import glob
+
+import geopandas as gpd
+import pandas as pd
+from pyogrio import read_dataframe, write_dataframe
+import numpy as np
+from tqdm import tqdm
+from shapely.geometry import box, MultiPolygon
+from joblib import Parallel, delayed
+
+from tools.spatial_indexed_intersection import geom_overlay
+
+
+regions = glob.glob("DATA/refined/geopackage/100x100/100m격자총인구.gpkg")
+
+boarder = "DATA/refined/geopackage/2024_05_경북_도로명지도/경북_읍면동_경계.gpkg"
+
+boarder = read_dataframe(boarder)
+
+blind_spot = ("DATA/refined/geopackage/경북읍면동-행정동_음영구역.gpkg")
+
+blind_spot = read_dataframe(blind_spot)
+
+def process_region(under, upper, postfix=""):
+    print(postfix)
+    under_name = under
+    under = read_dataframe(under)
+    under = under.to_crs(upper.crs)
+    print(under.crs)
+    upper_transformed = upper.to_crs(under.crs)
+    blind_area_by_grid = geom_overlay(upper_transformed, under)
+    write_dataframe(blind_area_by_grid, f"DATA/refined/geopackage/음영구역_격자/{under_name.split('/')[-1]}{postfix}.gpkg")
+
+
+process_region(regions[0], blind_spot, "격자")
+process_region(regions[0], boarder, "격자_경계")
+#
+# # Execute the processing in parallel
+# results = Parallel(n_jobs=1)(delayed(process_region)(region, blind_spot) for region in regions)
+# print("!!!")
+# results = Parallel(n_jobs=1)(delayed(process_region)(region, boarder, "경계") for region in regions)(파일 끝에 줄바꿈 문자 없음)
 
생활안전지도_범죄주의구역_영역계산.py (added)
+++ 생활안전지도_범죄주의구역_영역계산.py
@@ -0,0 +1,44 @@
+import glob
+
+from pyogrio import read_dataframe, write_dataframe
+
+범죄주의구역_list_by_score = glob.glob("DATA/refined/geopackage/범죄주의구역_격자/*.gpkg")
+범죄주의구역_list_by_score = sorted(범죄주의구역_list_by_score)
+범죄주의구역_list_by_score = 범죄주의구역_list_by_score[-10:]
+print(범죄주의구역_list_by_score)
+
+
+도로실폭_격자_데이터 = read_dataframe("DATA/refined/geopackage/도로명주소/실폭도로_생활안전도로_0등급_dissolve.gpkg")
+도로실폭_격자_데이터["road_area"]=도로실폭_격자_데이터["geometry"].area
+도로실폭_격자_데이터 = 도로실폭_격자_데이터.set_index("GID")
+
+
+격자_데이터 = read_dataframe("/home/juni/문서/경상북도_CCTV_설치_분석/DATA/refined/geopackage/100x100/100m격자총인구.gpkg")
+격자_데이터 = 격자_데이터.set_index("GID")
+격자_데이터 = 격자_데이터.merge(도로실폭_격자_데이터["road_area"], left_index=True, right_index=True, how="left")
+
+
+for i, criminal in enumerate(범죄주의구역_list_by_score):
+    crimin_area = read_dataframe(criminal)
+    crimin_area = crimin_area.set_index("GID")
+    col_index = f"{i+1}등급영역"
+    crimin_area[col_index] = crimin_area["geometry"].area
+    격자_데이터 = 격자_데이터.merge(crimin_area[col_index], left_index=True, right_index=True, how="left")
+
+def score_calc(row):
+    ret = 0
+    if row["road_area"] != 0:
+        for i in range(10):
+            ret += row[f"{i + 1}등급영역"]
+        ret = ret * 0.1
+        ret = ret / row["road_area"]
+    else:
+        ret = 0
+
+    return ret
+
+격자_데이터 = 격자_데이터.fillna(0)
+격자_데이터["범죄취약점수"] = 격자_데이터.apply(lambda row : score_calc(row), axis=1)
+격자_데이터 = 격자_데이터.reset_index()
+write_dataframe(격자_데이터, "DATA/processed/범죄취약지수/범죄취약등급.gpkg")
+
 
아파트구획_추출.py (added)
+++ 아파트구획_추출.py
@@ -0,0 +1,64 @@
+import re
+import pyogrio
+import pandas as pd
+import geopandas as gpd
+import multiprocessing as mp
+import fiona
+from tqdm import tqdm
+
+
+def filter_rows_by_regex(file_path, pattern, chunk_size, chunk_num, queue):
+    gdf_chunk = pyogrio.read_dataframe(file_path, skip_features=chunk_num * chunk_size, max_features=chunk_size)
+    regex = re.compile(pattern)
+    filtered_chunk = gdf_chunk[gdf_chunk['A8'].str.contains(regex, na=False)]
+
+    queue.put(1)  # Indicate progress
+    return filtered_chunk
+
+def parallel_process(file_path, pattern, chunk_size):
+    # Get the number of features (rows) using fiona
+    with fiona.open(file_path) as src:
+        num_rows = len(src)
+
+    num_chunks = (num_rows // chunk_size) + 1
+
+    # Create a multiprocessing pool
+    manager = mp.Manager()
+    queue = manager.Queue()
+    pool = mp.Pool(mp.cpu_count())
+
+    # Create a progress bar
+    pbar = tqdm(total=num_chunks, desc="Processing Chunks")
+
+    # Process each chunk in parallel and track progress
+    results = []
+    for i in range(num_chunks):
+        result = pool.apply_async(filter_rows_by_regex, args=(file_path, pattern, chunk_size, i, queue))
+        results.append(result)
+
+    # Close the pool
+    pool.close()
+
+    # Update progress bar based on queue
+    for _ in range(num_chunks):
+        queue.get()
+        pbar.update(1)
+
+    # Wait for all processes to finish
+    pool.join()
+    pbar.close()
+
+    # Combine the results
+    filtered_chunks = [result.get() for result in results]
+    filtered_data = gpd.GeoDataFrame(pd.concat(filtered_chunks, ignore_index=True))
+
+    return filtered_data
+
+
+# Example usage
+file_path = 'DATA/refined/geopackage/토지이용계획정보_국토교통부_경북_20240406.gpkg'
+pattern = r'아파트|공동주택'
+filtered_data = parallel_process(file_path, pattern, 8128)
+
+# Save or further process the resulting GeoDataFrame
+pyogrio.write_dataframe(filtered_data, "아파트구획_경북.gpkg")(파일 끝에 줄바꿈 문자 없음)
Add a comment
List