import pandas as pd import glob import os from tools.geocode.naver_geocode import Naver_Map from time import sleep if __name__ == "__main__": all_files = glob.glob("DATA/refined/건축물대장/지오코딩전_파일_분리/*.csv") completed_files = glob.glob("DATA/refined/건축물대장/지오코드/*.csv") def adjust_filename(filename): # Remove directory and extension, then strip '_geocoded' base_name = os.path.basename(filename) return base_name.replace('_geocoded', '') completed_files_set = set(adjust_filename(f) for f in completed_files) remaining_files = [f for f in all_files if os.path.basename(f) not in completed_files_set] naver_geocode = Naver_Map() for csv in remaining_files: df = pd.read_csv(csv) latitudes = [] longitudes = [] df["address"] = df.apply( lambda row: f"{row['시도']} {row['시군구']} {row['법정동']}" + ("" if row['번'] == 0 or row['번'] == "" else f" {pd.to_numeric(str(row['번']), errors = 'ignore', downcast='integer')}") + ("" if row['지'] == 0 or row['지'] == "" else f"-{pd.to_numeric(str(row['지']), errors = 'ignore', downcast='integer')}"), axis=1 ) previous_addr = None for i, addr in enumerate(df["address"]): print(f"{i}/{len(df)} : {addr}") current_addr = addr # compare if this is duplicate # 같은 주소지에 아파트나 블럭단위로 건설이 진행되어 여러 동이 같이 있는 경우 일어남. if current_addr == previous_addr: no_api_call_flag = True else: no_api_call_flag = False if not no_api_call_flag: response = naver_geocode.geocoding(addr) if response: lat = response[0] lon = response[1] latitudes.append(lat) longitudes.append(lon) sleep(0.5) else: lat = "INVALID" lon = "INVALID" latitudes.append(lat) longitudes.append(lon) sleep(0.5) else: latitudes.append(lat) longitudes.append(lon) previous_addr = addr df['latitude'] = latitudes df['longitude'] = longitudes df.to_csv(csv.replace(".csv", "_geocoded.csv").replace("지오코딩전_파일_분리","지오코드"), index=False)