File name
Commit message
Commit date
from pyogrio import read_dataframe, write_dataframe
import glob
import pandas as pd
import numpy as np
import geopandas as gpd
import json
from joblib import Parallel, delayed
from tools.spatial_indexed_intersection import geom_overlay
GRID_INDEX = "GID"
LOCALE_INDEX = "EMD_CD"
SIG_CODE = [
["경산", "47290"],
["경주", "47130"],
["구미", "47190"],
["김천", "47150"],
["안동", "47170"],
["영주", "47210"],
["영천", "47230"],
["예천", "47900"],
["칠곡", "47850"],
["포항_남구", "47111"],
["포항_북구", "47113"]
]
gpkg_datas = glob.glob("DATA/processed/점수산출_등급화전_전체데이터/v4/*.gpkg")
house_ratio_grid_path = "DATA/processed/건물연면적/buildings_summary_per_grid.gpkg"
house_ratio_grid_columns_to_extract = ["BLDG_CNT", "FLOOR_AREA_SUM","HOUSE_FLOOR_AREA_SUM", "HOUSE_FLOOR_AREA_RATIO"]
def range_creator(df, column, num_ranges):
"""
Creates a list of tuples representing ranges based on percentiles.
Parameters:
df (pd.DataFrame): The input dataframe.
column (str): The column name to calculate the ranges for.
num_ranges (int): The number of ranges to create.
Returns:
list of tuples: Each tuple contains the start and end of a range.
"""
percentiles = np.linspace(0, 100, num_ranges + 1)
values = np.percentile(df[column], percentiles)
ranges = [(values[i], values[i + 1]) for i in range(len(values) - 1)]
return ranges
def map_value_to_range(df, column, ranges):
"""
Maps values in a column to the specified ranges.
Parameters:
df (pd.DataFrame): The input dataframe.
column (str): The column name to map values for.
ranges (list of tuples): The list of ranges to map values to.
Returns:
pd.Series: A series of the same length as the input dataframe,
where each value is the index of the range it falls into.
"""
def map_value_to_range(value):
for i, (start, end) in enumerate(ranges):
if start <= value <= end:
return i + 1
return np.nan
return df[column].apply(map_value_to_range)
def export_geojson_into_superset_compat_data(df, name):
print("폴리곤 데이터가 한 줄에 한 도형인지 다시 확인하세요! 별도의 예외처리가 없습니다!")
df = df.to_crs("epsg:4326")
df = df.fillna(0)
# converting shapely format into geojson for SUPERSET visualization use
df["geometry"] = df["geometry"].apply(lambda row : row.__geo_interface__["coordinates"][0][0])
#... mapbox don't like single quote...
df["geometry"] = df["geometry"].apply(lambda row : json.dumps(row, indent=4))
df.to_csv(f"{name}.csv")
# write_dataframe(df, f"DATA/processed/점수산출_등급화전_전체데이터/{i.split('/')[-1].split('.')[0]}.csv")
def process_region(gpkg):
df = read_dataframe(gpkg)
df = df.fillna(0)
filename = gpkg.split('/')[-1].split('.')[0]
# exclude by rules
df = df[(df["총인구"] > 0) | (df["BLDG_CNT"] > 0)]
df = df[df["산지영역"] < 9000]
df = df[df["아파트단지영역"] < 1000]
# calculate
df["감시취약지수"] = (10000 - df["CCTV_감시영역"]) * 0.00352
df["범죄특성요인"] = df["범죄취약점수"] * 17.6
df["범죄예방요인"] = df["area_ratio"] * 14.7
df["환경요인"] = df["IH_RATIO"] * 2.9
인구밀집요인등급화 = range_creator(df, "총인구", 10)
df["인구밀집요인"] = map_value_to_range(df, "총인구", 인구밀집요인등급화) * 1.76
취약인구비율_평균 = df["취약인구_비율"].mean()
단독주택연면적_평균 = df["HOUSE_FLOOR_AREA_RATIO"].mean()
df["가중치1"] = df.apply(lambda row: 1 if row["취약인구_비율"] > 취약인구비율_평균 else 0, axis=1)
df["가중치2"] = df.apply(lambda row: 1 if row["HOUSE_FLOOR_AREA_RATIO"] > 단독주택연면적_평균 else 0, axis=1)
def calculate_score(row):
sum = row["감시취약지수"] + row["범죄특성요인"] + row["범죄예방요인"] + row["환경요인"] + row["인구밀집요인"]
가중치 = 1
if row["가중치1"] == 1 :
가중치 += 0.095
if row["가중치2"] == 1 :
가중치 += 0.041
return sum * 가중치
df["최종지수"] = df.apply(calculate_score, axis=1)
# print(df["최종지수"])
최종지수등급화 = range_creator(df, "최종지수", 100)
df["최종지수등급"] = map_value_to_range(df, "최종지수", 최종지수등급화)
save_loc = f"DATA/processed/최종데이터/{gpkg.split('/')[-1].split('.')[0]}_격자"
df = df.drop_duplicates(keep="first")
write_dataframe(df, f"{save_loc}.gpkg")
with open(f"{save_loc}_메타데이터.txt", 'w', encoding='utf-8') as file:
file.write(f"취약인구비율_평균 : {취약인구비율_평균}\n")
file.write(f"단독주택연면적_평균 : {단독주택연면적_평균}\n")
file.write(f"인구밀집요인등급화 : {인구밀집요인등급화}\n")
file.write(f"최종지수등급화 : {최종지수등급화}\n")
pd.set_option("display.max_rows", None)
file.write(f"등급별_격자수 : {df['최종지수등급'].value_counts().sort_index()}\n")
df = df.to_crs("epsg:4326")
df["centroid"] = df["geometry"].centroid
df.to_csv(f"{save_loc}_표준좌표계.csv")
Parallel(n_jobs=11)(delayed(process_region)(gpkg) for gpkg in gpkg_datas)