import pandas as pd import geopandas as gpd from pyogrio import read_dataframe, write_dataframe from shapely.geometry import Point buildings_df = pd.read_csv("DATA/raw/상가정보/소상공인시장진흥공단_상가(상권)정보_경북_202403.csv", low_memory=False) grid_df = read_dataframe('DATA/refined/geopackage/시군구읍면동_경상북도_100x100.gpkg', encoding='utf-8') grid_df = grid_df.to_crs("epsg:4326") def is_float(x): try: float(x) return True except ValueError: return False valid_buildings_df = buildings_df[buildings_df['경도'].apply(is_float) & buildings_df['위도'].apply(is_float)] valid_buildings_df['geometry'] = valid_buildings_df.apply(lambda row: Point(float(row['경도']), float(row['위도'])), axis=1) buildings_gdf = gpd.GeoDataFrame(valid_buildings_df, geometry='geometry') buildings_gdf.set_crs(grid_df.crs, inplace=True) joined_df = gpd.sjoin(buildings_gdf, grid_df, how='inner', op='within') # this returns series object, so there is no 'columns', but you can name it. stores = joined_df.groupby('SPO_NO_CD').size() stores.name = '상가_수' # buildings_area_sum = joined_df.groupby('SPO_NO_CD')['연면적(㎡)'].sum() # buildings_area_sum.name = '연면적(㎡)합' selection_slice = (joined_df["상권업종중분류명"] == '일반 숙박') | (joined_df["상권업종중분류명"] == "주점") inn_and_hedonic = joined_df[selection_slice].groupby('SPO_NO_CD').size() inn_and_hedonic.name = '숙박 및 유해업소수' ratio_of_inn_and_hedonic = inn_and_hedonic.divide(stores).where(stores >= 5, 0) ratio_of_inn_and_hedonic.name = '숙박및 유해업소 비율' stores.to_csv('store_counts.csv') inn_and_hedonic.to_csv('inn_and_hedonic.csv') ratio_of_inn_and_hedonic.to_csv('inn_and_hedonic_ratio.csv') # Drop duplicates to avoid multiple entries of the same base_road square final_gdf = joined_df.drop_duplicates(subset=['SPO_NO_CD']) summary_df = pd.DataFrame({'STORE_CNT': stores, 'IH_CNT': inn_and_hedonic, 'IH_RATIO': ratio_of_inn_and_hedonic}).reset_index() summary_df.fillna(0) summary_gdf = grid_df.merge(summary_df, on='SPO_NO_CD') # Export to Shapefile write_dataframe(summary_gdf, 'DATA/processed/유흥_숙박업소/inn_and_hedonic.gpkg') # Do not use geopandas for saving files, it will corrupt non-latin characters by incorrectly assign encodings for it. (always latin-1) # summary_gdf.to_file('DATA/processed/건물연면적/buildings_summary_per_grid.shp', encoding='utf-8')