File name
Commit message
Commit date
import pandas as pd
import geopandas as gpd
from pyogrio import read_dataframe, write_dataframe
from shapely.geometry import Point
buildings_df = pd.read_csv("DATA/refined/건축물대장/concat.csv", low_memory=False)
grid_df = read_dataframe('DATA/refined/geopackage/시군구읍면동_경상북도_100x100.gpkg', encoding='utf-8')
grid_df = grid_df.to_crs("epsg:4326")
def is_float(x):
try:
float(x)
return True
except ValueError:
return False
valid_buildings_df = buildings_df[buildings_df['longitude'].apply(is_float) & buildings_df['latitude'].apply(is_float)]
valid_buildings_df['geometry'] = valid_buildings_df.apply(lambda row: Point(float(row['longitude']), float(row['latitude'])), axis=1)
buildings_gdf = gpd.GeoDataFrame(valid_buildings_df, geometry='geometry')
buildings_gdf.set_crs(grid_df.crs, inplace=True)
joined_df = gpd.sjoin(buildings_gdf, grid_df, how='inner', op='within')
# this returns series object, so there is no 'columns', but you can name it.
buildings_count = joined_df.groupby('SPO_NO_CD').size()
buildings_count.name = '건물_수'
buildings_area_sum = joined_df.groupby('SPO_NO_CD')['연면적(㎡)'].sum()
buildings_area_sum.name = '연면적(㎡)합'
buildings_area_sum_house = joined_df[joined_df['주용도'] == '단독주택'].groupby('SPO_NO_CD')['연면적(㎡)'].sum()
buildings_area_sum_house.name = '단독주택_연면적(㎡)합'
buildings_house_ratio = buildings_area_sum_house / buildings_area_sum
buildings_house_ratio.name = '단독주택_연면적(㎡)_비율'
buildings_count.to_csv('building_counts_per_grid.csv')
buildings_area_sum.to_csv('building_area_sums_per_grid.csv')
buildings_area_sum_house.to_csv('houses_area_sums_per_grid.csv')
buildings_house_ratio.to_csv('house_ratio_per_grid.csv')
# Drop duplicates to avoid multiple entries of the same base_road square
final_gdf = joined_df.drop_duplicates(subset=['SPO_NO_CD'])
summary_df = pd.DataFrame({'BLDG_CNT': buildings_count, 'FLOOR_AREA_SUM': buildings_area_sum, 'HOUSE_FLOOR_AREA_SUM': buildings_area_sum_house, 'HOUSE_FLOOR_AREA_RATIO': buildings_house_ratio}).reset_index()
summary_df.fillna(0)
summary_gdf = grid_df.merge(summary_df, on='SPO_NO_CD')
# Export to Shapefile
write_dataframe(summary_gdf, 'DATA/processed/건물연면적/buildings_summary_per_grid.gpkg')
# Do not use geopandas for saving files, it will corrupt non-latin characters by incorrectly assign encodings for it. (always latin-1)
# summary_gdf.to_file('DATA/processed/건물연면적/buildings_summary_per_grid.shp', encoding='utf-8')