File name
Commit message
Commit date
import pandas as pd
import glob
import os
from tools.geocode.naver_geocode import Naver_Map
from time import sleep
if __name__ == "__main__":
all_files = glob.glob("DATA/refined/건축물대장/지오코딩전_파일_분리/*.csv")
completed_files = glob.glob("DATA/refined/건축물대장/지오코드/*.csv")
def adjust_filename(filename):
# Remove directory and extension, then strip '_geocoded'
base_name = os.path.basename(filename)
return base_name.replace('_geocoded', '')
completed_files_set = set(adjust_filename(f) for f in completed_files)
remaining_files = [f for f in all_files if os.path.basename(f) not in completed_files_set]
naver_geocode = Naver_Map()
for csv in remaining_files:
df = pd.read_csv(csv)
latitudes = []
longitudes = []
df["address"] = df.apply(
lambda row:
f"{row['시도']} {row['시군구']} {row['법정동']}" +
("" if row['번'] == 0 or row['번'] == "" else f" {pd.to_numeric(str(row['번']), errors = 'ignore', downcast='integer')}") +
("" if row['지'] == 0 or row['지'] == "" else f"-{pd.to_numeric(str(row['지']), errors = 'ignore', downcast='integer')}"),
axis=1
)
previous_addr = None
for i, addr in enumerate(df["address"]):
print(f"{i}/{len(df)} : {addr}")
current_addr = addr
# compare if this is duplicate
# 같은 주소지에 아파트나 블럭단위로 건설이 진행되어 여러 동이 같이 있는 경우 일어남.
if current_addr == previous_addr:
no_api_call_flag = True
else:
no_api_call_flag = False
if not no_api_call_flag:
response = naver_geocode.geocoding(addr)
if response:
lat = response[0]
lon = response[1]
latitudes.append(lat)
longitudes.append(lon)
sleep(0.5)
else:
lat = "INVALID"
lon = "INVALID"
latitudes.append(lat)
longitudes.append(lon)
sleep(0.5)
else:
latitudes.append(lat)
longitudes.append(lon)
previous_addr = addr
df['latitude'] = latitudes
df['longitude'] = longitudes
df.to_csv(csv.replace(".csv", "_geocoded.csv").replace("지오코딩전_파일_분리","지오코드"), index=False)