File name
Commit message
Commit date
import pandas as pd
import geopandas as gpd
from pyogrio import read_dataframe, write_dataframe
from shapely.geometry import Point
buildings_df = pd.read_csv("DATA/raw/상가정보/소상공인시장진흥공단_상가(상권)정보_경북_202403.csv", low_memory=False)
grid_df = read_dataframe('DATA/refined/geopackage/시군구읍면동_경상북도_100x100.gpkg', encoding='utf-8')
grid_df = grid_df.to_crs("epsg:4326")
def is_float(x):
try:
float(x)
return True
except ValueError:
return False
valid_buildings_df = buildings_df[buildings_df['경도'].apply(is_float) & buildings_df['위도'].apply(is_float)]
valid_buildings_df['geometry'] = valid_buildings_df.apply(lambda row: Point(float(row['경도']), float(row['위도'])), axis=1)
buildings_gdf = gpd.GeoDataFrame(valid_buildings_df, geometry='geometry')
buildings_gdf.set_crs(grid_df.crs, inplace=True)
joined_df = gpd.sjoin(buildings_gdf, grid_df, how='inner', op='within')
# this returns series object, so there is no 'columns', but you can name it.
stores = joined_df.groupby('SPO_NO_CD').size()
stores.name = '상가_수'
# buildings_area_sum = joined_df.groupby('SPO_NO_CD')['연면적(㎡)'].sum()
# buildings_area_sum.name = '연면적(㎡)합'
selection_slice = (joined_df["상권업종중분류명"] == '일반 숙박') | (joined_df["상권업종중분류명"] == "주점")
inn_and_hedonic = joined_df[selection_slice].groupby('SPO_NO_CD').size()
inn_and_hedonic.name = '숙박 및 유해업소수'
ratio_of_inn_and_hedonic = inn_and_hedonic.divide(stores).where(stores >= 5, 0)
ratio_of_inn_and_hedonic.name = '숙박및 유해업소 비율'
stores.to_csv('store_counts.csv')
inn_and_hedonic.to_csv('inn_and_hedonic.csv')
ratio_of_inn_and_hedonic.to_csv('inn_and_hedonic_ratio.csv')
# Drop duplicates to avoid multiple entries of the same base_road square
final_gdf = joined_df.drop_duplicates(subset=['SPO_NO_CD'])
summary_df = pd.DataFrame({'STORE_CNT': stores, 'IH_CNT': inn_and_hedonic, 'IH_RATIO': ratio_of_inn_and_hedonic}).reset_index()
summary_df.fillna(0)
summary_gdf = grid_df.merge(summary_df, on='SPO_NO_CD')
# Export to Shapefile
write_dataframe(summary_gdf, 'DATA/processed/유흥_숙박업소/inn_and_hedonic.gpkg')
# Do not use geopandas for saving files, it will corrupt non-latin characters by incorrectly assign encodings for it. (always latin-1)
# summary_gdf.to_file('DATA/processed/건물연면적/buildings_summary_per_grid.shp', encoding='utf-8')