File name
Commit message
Commit date
File name
Commit message
Commit date
File name
Commit message
Commit date
import pandas as pd
import numpy as np
import plotly.express as px
def filter(df, col, col_key, is_in=True):
if is_in:
filtered = df[df.loc[:, col] == col_key]
else:
filtered = df[df.loc[:, col] != col_key]
return filtered
def show(starting_year, ending_year, city='영천시', extrafilter=None):
'''
:param starting_year:
:param ending_year:
:param city:
:param extrafilter: list of list [col, col_key, is_in] are accepted input, filters elements in df
:return:
'''
starting_year_all = filter(starting_year, '시도명', '경상북도')
starting_year_all = filter(starting_year_all, '상권업종소분류명', '주유소', False)
starting_year_city = filter(starting_year, '시군구명', city)
starting_year_city = filter(starting_year_city, '상권업종소분류명', '주유소', False)
ending_year_all = filter(ending_year, '시도명', '경상북도')
ending_year_all = filter(ending_year_all, '상권업종소분류명', '주유소', False)
ending_year_city = filter(ending_year, '시군구명', city)
ending_year_city = filter(ending_year_city, '상권업종소분류명', '주유소', False)
if extrafilter:
for key in extrafilter:
col = key[0]
col_key = key[1]
is_in = key[2]
starting_year_all = filter(starting_year_all, col, col_key, is_in)
starting_year_city = filter(starting_year_city, col, col_key, is_in)
ending_year_all = filter(ending_year_all, col, col_key, is_in)
ending_year_city = filter(ending_year_city, col, col_key, is_in)
df_ycity_diff = pd.concat([starting_year_city, ending_year_city])
df_gb_diff = pd.concat([starting_year_all, ending_year_all])
df_gb_survive = df_gb_diff.groupby(['상가업소번호', '상호명', '상권업종소분류명']).count()
df_ycity_survive = df_ycity_diff.groupby(['상가업소번호', '상호명', '상권업종소분류명']).count()
if len(df_gb_survive[df_gb_survive['위도'] > 0]):
gb = len(starting_year_all) / len(df_gb_survive[df_gb_survive['위도'] > 0])
else :
gb = 0
if len(df_ycity_survive[df_ycity_survive['위도'] > 0]):
yc = len(starting_year_city) / len(df_ycity_survive[df_ycity_survive['위도'] > 0])
else:
yc = 0
if len(df_gb_survive[df_gb_survive['위도'] > 0]):
print('경북', gb)
if len(df_ycity_survive[df_ycity_survive['위도'] > 0]):
print('영천', yc)
return gb, yc
# df_ycity_survive.groupby('상권업종소분류명').sum().to_csv(f'영천_{filename}.csv')
# df_gb_survive.groupby('상권업종소분류명').sum().to_csv(f'경북_{filename}.csv')
# df_2018=pd.read_csv('2019년/소상공인시장진흥공단_상가업소정보_201812_4.csv',encoding='euc-kr')
df_2019=pd.read_csv('2019년/상가업소정보_201912_04.csv', sep='|')
df_2020=pd.read_csv('소상공인시장진흥공단_상가(상권)정보_경북_202012.csv',sep='|')
df_2021=pd.read_csv('소상공인시장진흥공단_상가(상권)정보_경북_202112.csv')
df_2022=pd.read_csv('소상공인시장진흥공단_상가(상권)정보_경북_202212.csv')
extra_filter =[
['상권업종소분류명', '편의점', True],
]
extra_filter2 =[
['상권업종소분류명', '편의점', False],
]
region = ["금호읍",
"동부동",
"남부동",
"서부동",
"고경면",
"완산동",
"청통면",
"중앙동",
"북안면",
"신녕면",
"대창면",
"임고면",
"화산면",
"화남면",
"자양면",
"화북면",]
region = sorted(region)
sector = df_2019.loc[:, "상권업종중분류명"].unique()
csv_out = pd.DataFrame()
csv_out_out = pd.DataFrame(index=sector)
for j, year in enumerate([ df_2020, df_2021, df_2022]):
print("전체")
show(df_2019, year)
# print("편의점")
# show(df_2019, year, '영천시', extra_filter)
# print("편의점X")
# show(df_2019, year, '영천시', extra_filter2)
csv_out = [None] * len(sector)
for i, emd in enumerate(sector):
print(emd)
exfil = [['상권업종대분류명', '소매', True],
['상권업종중분류명', emd, True],
['행정동명', '서부동', True]]
gb, yc = show(df_2019, year, '영천시', exfil)
csv_out[i] = yc
csv_out = pd.DataFrame(csv_out, index=sector)
csv_out_out = pd.concat((csv_out_out, csv_out), axis=1)
csv_out_out.to_csv(f"_업종_중분류_서부동.csv")