from pyogrio import read_dataframe, write_dataframe import glob import pandas as pd import numpy as np import geopandas as gpd import json from joblib import Parallel, delayed from tools.spatial_indexed_intersection import geom_overlay GRID_INDEX = "GID" LOCALE_INDEX = "EMD_CD" SIG_CODE = [ ["경산", "47290"], ["경주", "47130"], ["구미", "47190"], ["김천", "47150"], ["안동", "47170"], ["영주", "47210"], ["영천", "47230"], ["예천", "47900"], ["칠곡", "47850"], ["포항_남구", "47111"], ["포항_북구", "47113"] ] gpkg_datas = glob.glob("DATA/processed/점수산출_등급화전_전체데이터/v4/*.gpkg") house_ratio_grid_path = "DATA/processed/건물연면적/buildings_summary_per_grid.gpkg" house_ratio_grid_columns_to_extract = ["BLDG_CNT", "FLOOR_AREA_SUM","HOUSE_FLOOR_AREA_SUM", "HOUSE_FLOOR_AREA_RATIO"] def range_creator(df, column, num_ranges): """ Creates a list of tuples representing ranges based on percentiles. Parameters: df (pd.DataFrame): The input dataframe. column (str): The column name to calculate the ranges for. num_ranges (int): The number of ranges to create. Returns: list of tuples: Each tuple contains the start and end of a range. """ percentiles = np.linspace(0, 100, num_ranges + 1) values = np.percentile(df[column], percentiles) ranges = [(values[i], values[i + 1]) for i in range(len(values) - 1)] return ranges def map_value_to_range(df, column, ranges): """ Maps values in a column to the specified ranges. Parameters: df (pd.DataFrame): The input dataframe. column (str): The column name to map values for. ranges (list of tuples): The list of ranges to map values to. Returns: pd.Series: A series of the same length as the input dataframe, where each value is the index of the range it falls into. """ def map_value_to_range(value): for i, (start, end) in enumerate(ranges): if start <= value <= end: return i + 1 return np.nan return df[column].apply(map_value_to_range) def export_geojson_into_superset_compat_data(df, name): print("폴리곤 데이터가 한 줄에 한 도형인지 다시 확인하세요! 별도의 예외처리가 없습니다!") df = df.to_crs("epsg:4326") df = df.fillna(0) # converting shapely format into geojson for SUPERSET visualization use df["geometry"] = df["geometry"].apply(lambda row : row.__geo_interface__["coordinates"][0][0]) #... mapbox don't like single quote... df["geometry"] = df["geometry"].apply(lambda row : json.dumps(row, indent=4)) df.to_csv(f"{name}.csv") # write_dataframe(df, f"DATA/processed/점수산출_등급화전_전체데이터/{i.split('/')[-1].split('.')[0]}.csv") def process_region(gpkg): df = read_dataframe(gpkg) df = df.fillna(0) filename = gpkg.split('/')[-1].split('.')[0] # exclude by rules df = df[(df["총인구"] > 0) | (df["BLDG_CNT"] > 0)] df = df[df["산지영역"] < 9000] df = df[df["아파트단지영역"] < 1000] # calculate df["감시취약지수"] = (10000 - df["CCTV_감시영역"]) * 0.00352 df["범죄특성요인"] = df["범죄취약점수"] * 17.6 df["범죄예방요인"] = df["area_ratio"] * 14.7 df["환경요인"] = df["IH_RATIO"] * 2.9 인구밀집요인등급화 = range_creator(df, "총인구", 10) df["인구밀집요인"] = map_value_to_range(df, "총인구", 인구밀집요인등급화) * 1.76 취약인구비율_평균 = df["취약인구_비율"].mean() 단독주택연면적_평균 = df["HOUSE_FLOOR_AREA_RATIO"].mean() df["가중치1"] = df.apply(lambda row: 1 if row["취약인구_비율"] > 취약인구비율_평균 else 0, axis=1) df["가중치2"] = df.apply(lambda row: 1 if row["HOUSE_FLOOR_AREA_RATIO"] > 단독주택연면적_평균 else 0, axis=1) def calculate_score(row): sum = row["감시취약지수"] + row["범죄특성요인"] + row["범죄예방요인"] + row["환경요인"] + row["인구밀집요인"] 가중치 = 1 if row["가중치1"] == 1 : 가중치 += 0.095 if row["가중치2"] == 1 : 가중치 += 0.041 return sum * 가중치 df["최종지수"] = df.apply(calculate_score, axis=1) # print(df["최종지수"]) 최종지수등급화 = range_creator(df, "최종지수", 100) df["최종지수등급"] = map_value_to_range(df, "최종지수", 최종지수등급화) save_loc = f"DATA/processed/최종데이터/{gpkg.split('/')[-1].split('.')[0]}_격자" df = df.drop_duplicates(keep="first") write_dataframe(df, f"{save_loc}.gpkg") with open(f"{save_loc}_메타데이터.txt", 'w', encoding='utf-8') as file: file.write(f"취약인구비율_평균 : {취약인구비율_평균}\n") file.write(f"단독주택연면적_평균 : {단독주택연면적_평균}\n") file.write(f"인구밀집요인등급화 : {인구밀집요인등급화}\n") file.write(f"최종지수등급화 : {최종지수등급화}\n") pd.set_option("display.max_rows", None) file.write(f"등급별_격자수 : {df['최종지수등급'].value_counts().sort_index()}\n") df = df.to_crs("epsg:4326") df["centroid"] = df["geometry"].centroid df.to_csv(f"{save_loc}_표준좌표계.csv") Parallel(n_jobs=11)(delayed(process_region)(gpkg) for gpkg in gpkg_datas)