import pandas as pd import geopandas as gpd from pyogrio import read_dataframe, write_dataframe from shapely.geometry import Point buildings_df = pd.read_csv("DATA/refined/건축물대장/concat.csv", low_memory=False) grid_df = read_dataframe('DATA/refined/geopackage/시군구읍면동_경상북도_100x100.gpkg', encoding='utf-8') grid_df = grid_df.to_crs("epsg:4326") def is_float(x): try: float(x) return True except ValueError: return False valid_buildings_df = buildings_df[buildings_df['longitude'].apply(is_float) & buildings_df['latitude'].apply(is_float)] valid_buildings_df['geometry'] = valid_buildings_df.apply(lambda row: Point(float(row['longitude']), float(row['latitude'])), axis=1) buildings_gdf = gpd.GeoDataFrame(valid_buildings_df, geometry='geometry') buildings_gdf.set_crs(grid_df.crs, inplace=True) joined_df = gpd.sjoin(buildings_gdf, grid_df, how='inner', op='within') # this returns series object, so there is no 'columns', but you can name it. buildings_count = joined_df.groupby('SPO_NO_CD').size() buildings_count.name = '건물_수' buildings_area_sum = joined_df.groupby('SPO_NO_CD')['연면적(㎡)'].sum() buildings_area_sum.name = '연면적(㎡)합' buildings_area_sum_house = joined_df[joined_df['주용도'] == '단독주택'].groupby('SPO_NO_CD')['연면적(㎡)'].sum() buildings_area_sum_house.name = '단독주택_연면적(㎡)합' buildings_house_ratio = buildings_area_sum_house / buildings_area_sum buildings_house_ratio.name = '단독주택_연면적(㎡)_비율' buildings_count.to_csv('building_counts_per_grid.csv') buildings_area_sum.to_csv('building_area_sums_per_grid.csv') buildings_area_sum_house.to_csv('houses_area_sums_per_grid.csv') buildings_house_ratio.to_csv('house_ratio_per_grid.csv') # Drop duplicates to avoid multiple entries of the same base_road square final_gdf = joined_df.drop_duplicates(subset=['SPO_NO_CD']) summary_df = pd.DataFrame({'BLDG_CNT': buildings_count, 'FLOOR_AREA_SUM': buildings_area_sum, 'HOUSE_FLOOR_AREA_SUM': buildings_area_sum_house, 'HOUSE_FLOOR_AREA_RATIO': buildings_house_ratio}).reset_index() summary_df.fillna(0) summary_gdf = grid_df.merge(summary_df, on='SPO_NO_CD') # Export to Shapefile write_dataframe(summary_gdf, 'DATA/processed/건물연면적/buildings_summary_per_grid.gpkg') # Do not use geopandas for saving files, it will corrupt non-latin characters by incorrectly assign encodings for it. (always latin-1) # summary_gdf.to_file('DATA/processed/건물연면적/buildings_summary_per_grid.shp', encoding='utf-8')