data:image/s3,"s3://crabby-images/77fc1/77fc1ecd598263bdfa1d6248fbe60b3bfc41f6f8" alt=""
data:image/s3,"s3://crabby-images/aba99/aba9923901faa38de43ebb6f042a7cbd5b98cedb" alt=""
missing files
@a566adae933eb7d1530197df8ed1578d5102ed64
+++ choropleth.py
... | ... | @@ -0,0 +1,311 @@ |
1 | +import os | |
2 | +import io | |
3 | +from PIL import Image | |
4 | +import numpy as np | |
5 | +import plotly.express as px | |
6 | +import plotly.graph_objects as go | |
7 | +import cv2 | |
8 | +from itertools import compress | |
9 | +from geopandas import points_from_xy | |
10 | +from scipy.spatial import Voronoi, voronoi_plot_2d | |
11 | +from scipy.spatial.distance import cdist | |
12 | +# import matplotlib.pyplot as plt # debugging | |
13 | + | |
14 | +sep = os.path.sep | |
15 | + | |
16 | + | |
17 | +def extract(lst, i): | |
18 | + return [item[i] for item in lst] | |
19 | + | |
20 | + | |
21 | +def extract_coord(lst): | |
22 | + return [item.coords[:] for item in lst] | |
23 | + | |
24 | + | |
25 | +def coord_lister(geom): | |
26 | + coords = list(geom.exterior.coords) | |
27 | + return (coords) | |
28 | + | |
29 | + | |
30 | +# def vor_ver(furthest_pnt, i): | |
31 | +# return furthest_pnt[i].vertices | |
32 | + | |
33 | +def get_plotting_zoom_level_and_center_coordinates_from_lonlat_tuples(longitudes=None, latitudes=None): | |
34 | + """Function documentation:\n | |
35 | + Basic framework adopted from Krichardson under the following thread: | |
36 | + https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/7 | |
37 | + | |
38 | + # NOTE: | |
39 | + # THIS IS A TEMPORARY SOLUTION UNTIL THE DASH TEAM IMPLEMENTS DYNAMIC ZOOM | |
40 | + # in their plotly-functions associated with mapbox, such as go.Densitymapbox() etc. | |
41 | + | |
42 | + Returns the appropriate zoom-level for these plotly-mapbox-graphics along with | |
43 | + the center coordinate tuple of all provided coordinate tuples. | |
44 | + """ | |
45 | + | |
46 | + # Check whether both latitudes and longitudes have been passed, | |
47 | + # or if the list lenghts don't match | |
48 | + if ((latitudes is None or longitudes is None) | |
49 | + or (len(latitudes) != len(longitudes))): | |
50 | + # Otherwise, return the default values of 0 zoom and the coordinate origin as center point | |
51 | + return 0, (0, 0) | |
52 | + | |
53 | + # Get the boundary-box | |
54 | + b_box = {} | |
55 | + b_box['height'] = latitudes.max() - latitudes.min() | |
56 | + b_box['width'] = longitudes.max() - longitudes.min() | |
57 | + b_box['center'] = (np.mean(longitudes), np.mean(latitudes)) | |
58 | + | |
59 | + # get the area of the bounding box in order to calculate a zoom-level | |
60 | + area = b_box['height'] * b_box['width'] | |
61 | + | |
62 | + # * 1D-linear interpolation with numpy: | |
63 | + # - Pass the area as the only x-value and not as a list, in order to return a scalar as well | |
64 | + # - The x-points "xp" should be in parts in comparable order of magnitude of the given area | |
65 | + # - The zoom-levels are adapted to the areas, i.e. start with the smallest area possible of 0 | |
66 | + # which leads to the highest possible zoom value 20, and so forth decreasing with increasing areas | |
67 | + # as these variables are antiproportional | |
68 | + zoom = np.interp(x=area, | |
69 | + xp=[0, 5 ** -10, 4 ** -10, 3 ** -10, 2 ** -10, 1 ** -10, 1 ** -5], | |
70 | + fp=[20, 15, 14, 13, 12, 7, 5]) | |
71 | + | |
72 | + # Finally, return the zoom level and the associated boundary-box center coordinates | |
73 | + return zoom, b_box['center'] | |
74 | + | |
75 | + | |
76 | +def plotly_fig2array(fig, scale): | |
77 | + # convert Plotly fig to an array | |
78 | + fig_bytes = fig.to_image(format="png", width=2000, height=800, scale=scale) | |
79 | + buf = io.BytesIO(fig_bytes) | |
80 | + img = Image.open(buf) | |
81 | + return np.asarray(img) | |
82 | + | |
83 | + | |
84 | +def beautify_text_placement(shp): | |
85 | + """ | |
86 | + Function to optimize text placement and size using Voronoi diagram. | |
87 | + | |
88 | + Arguments: | |
89 | + shp -- shape object | |
90 | + """ | |
91 | + num_area = len(shp) | |
92 | + polygons_coord = shp.geometry.apply(coord_lister) | |
93 | + furthest_pnt = [None] * num_area | |
94 | + voronoi_ori = [None] * num_area | |
95 | + for i, points in enumerate(polygons_coord): | |
96 | + voronoi_ori[i] = Voronoi(points, furthest_site=False) | |
97 | + furthest_pnt[i] = voronoi_ori[i].vertices | |
98 | + | |
99 | + voronoi_nodes = [None] * num_area | |
100 | + leccc = [None] * num_area | |
101 | + # get voronoi nodes that is inside the polygon. | |
102 | + for i in range(num_area): | |
103 | + voronoi_nodes[i] = points_from_xy(extract(furthest_pnt[i], 0), extract(furthest_pnt[i], 1)) | |
104 | + voronoi_nodes_if_inside = shp.geometry[i].contains(voronoi_nodes[i]) | |
105 | + voronoi_nodes[i] = list(compress(voronoi_nodes[i], voronoi_nodes_if_inside)) | |
106 | + leccc[i] = [item for sublist in extract_coord(voronoi_nodes[i]) for item in sublist] | |
107 | + | |
108 | + dist = [None] * num_area | |
109 | + for i, points in enumerate(polygons_coord): | |
110 | + # explaining this mess : I need to get the LEC from candidate I got before, but the thing is, | |
111 | + # cdist from scipy only accepts exact dimension with array like object, | |
112 | + # which means I need to explicitly set the shape of ndarray, convert list of points into ndarray | |
113 | + dist[i] = cdist(np.array([list(item) for item in points]), np.array([list(item) for item in leccc[i]])) | |
114 | + | |
115 | + lecc = [None] * num_area | |
116 | + lecc_dist = [None] * num_area | |
117 | + for i in range(num_area): | |
118 | + voronoi_nodes_dist = [None] * (dist[i].shape[1]) | |
119 | + for j in range(dist[i].shape[1]): | |
120 | + voronoi_nodes_dist[j] = min(dist[i][:, j]) | |
121 | + lecc_ind = np.argmax(voronoi_nodes_dist) | |
122 | + lecc[i] = leccc[i][lecc_ind] | |
123 | + lecc_dist[i] = np.max(voronoi_nodes_dist) | |
124 | + | |
125 | + return lecc, lecc_dist | |
126 | + | |
127 | + | |
128 | +def choropleth_chart(shp, df, title, save_dir, colorscheme="BuGn", show_legend=True, unit='', | |
129 | + adaptive_legend_font_size=False, geo_annot_scale = 1500, | |
130 | + font="Open Sans", scale=4, save=True): | |
131 | + f""" | |
132 | + shp : geopandas datastream from .shp, .shx and .dbf must exist in the same folder | |
133 | + df : single column of pandas DataFrame or any other iterable, height must be the number of object of shp | |
134 | + title : title of the chart | |
135 | + save_dir : save directory from 'figure/save_dir.png' | |
136 | + colorscheme : either string of predefined plotly colorscheme or your colorscheme that is iterate of colors | |
137 | + show_legend : Boolean, self explainatory | |
138 | + adaptive_legend_font_size : legend of regions become adaptive using voronoi algorithm, | |
139 | + asserting the biggest annotation adaptive to the size of the region | |
140 | + If false, there will be no annotation over region | |
141 | + This, takes some time and could be improved if you could simplify polygons, | |
142 | + which is not implemented. | |
143 | + font : Defaults to the default plotly font(Open Sans) if None. | |
144 | + Type font name string if you need. | |
145 | + Execute font_names.py if you want to see the names of the font that python env recognizes. | |
146 | + scale : the larger the better | |
147 | + save : Boolean, default is True. To save or not to save. | |
148 | + """ | |
149 | + # TODO add more settings | |
150 | + | |
151 | + num_area = len(shp) | |
152 | + cent_pnt = [None] * num_area | |
153 | + for i, cent in enumerate(shp.centroid): | |
154 | + cent_pnt[i] = [cent.x, cent.y] | |
155 | + | |
156 | + cent_map_x = sum(extract(cent_pnt, 0)) / num_area | |
157 | + cent_map_y = sum(extract(cent_pnt, 1)) / num_area | |
158 | + | |
159 | + lecc = None | |
160 | + lecc_dist = None | |
161 | + if adaptive_legend_font_size: | |
162 | + lecc, lecc_dist = beautify_text_placement(shp) | |
163 | + | |
164 | + # draw the choropleth | |
165 | + fig = px.choropleth( | |
166 | + # draw shp map | |
167 | + shp.set_index("EMD_KOR_NM"), | |
168 | + locations=shp.index, # names | |
169 | + geojson=shp.geometry, # geojson shape | |
170 | + color=df, # a row vector of data | |
171 | + color_continuous_scale=colorscheme, | |
172 | + center=dict(lat=cent_map_y, lon=cent_map_x), | |
173 | + range_color=[0, max(df)], | |
174 | + ) | |
175 | + | |
176 | + fig.update_coloraxes( | |
177 | + colorbar=dict( | |
178 | + title=dict( | |
179 | + text="", | |
180 | + font=dict( | |
181 | + size=30 | |
182 | + ), | |
183 | + ), | |
184 | + xanchor="left", | |
185 | + x=0.235, | |
186 | + tickfont=dict( | |
187 | + size=20 | |
188 | + ), | |
189 | + # tickformat=".0%" | |
190 | + # put tickformat for later uses? | |
191 | + ), | |
192 | + | |
193 | + ) | |
194 | + | |
195 | + if show_legend: | |
196 | + if adaptive_legend_font_size: | |
197 | + print("hello") | |
198 | + annotation_text_size = np.multiply(lecc_dist, geo_annot_scale) | |
199 | + fig.add_trace( | |
200 | + go.Scattergeo( | |
201 | + # draw region names based on lat and lon | |
202 | + lat=extract(lecc, 1), lon=extract(lecc, 0), | |
203 | + # lat=cent_pnt_y, lon=cent_pnt_x, | |
204 | + marker={ | |
205 | + "size": [0] * num_area, | |
206 | + }, | |
207 | + mode="text", | |
208 | + name="", | |
209 | + text=shp["EMD_KOR_NM"].values, | |
210 | + textposition=["middle center"] * num_area, | |
211 | + textfont={ | |
212 | + # "color": ["Black"] * num_area, | |
213 | + "family": [f"{font}"] * num_area, | |
214 | + "size": annotation_text_size, | |
215 | + } | |
216 | + ) | |
217 | + ) | |
218 | + else: | |
219 | + annotation_text_size = [40] * num_area | |
220 | + fig.add_trace( | |
221 | + go.Scattergeo( | |
222 | + # draw region names based on lat and lon | |
223 | + lat=extract(cent_pnt, 1), lon=extract(cent_pnt, 0), | |
224 | + # lat=cent_pnt_y, lon=cent_pnt_x, | |
225 | + marker={ | |
226 | + "size": [0] * num_area, | |
227 | + }, | |
228 | + mode="text", | |
229 | + name="", | |
230 | + text=shp["EMD_KOR_NM"].values, | |
231 | + textposition=["middle center"] * num_area, | |
232 | + textfont={ | |
233 | + # "color": ["Black"] * num_area, | |
234 | + "family": [f"{font}"] * num_area, | |
235 | + "size": annotation_text_size, | |
236 | + } | |
237 | + ) | |
238 | + ) | |
239 | + | |
240 | + | |
241 | + # testing with more annotation, such as putting numbers. | |
242 | + # fig.add_trace(go.Scattergeo( | |
243 | + # # draw region names based on lat and lon | |
244 | + # lat=extract(lecc, 1), lon=extract(lecc, 0), | |
245 | + # # lat=cent_pnt_y, lon=cent_pnt_x, | |
246 | + # marker={ | |
247 | + # "size": [0] * num_area, | |
248 | + # }, | |
249 | + # mode="text", | |
250 | + # name="", | |
251 | + # text=df, | |
252 | + # textposition=["bottom center"] * num_area, | |
253 | + # textfont={ | |
254 | + # "color": ["Black"] * num_area, | |
255 | + # "family": ["Open Sans"] * num_area, | |
256 | + # "size": annotation_text_size*0.6, | |
257 | + # } | |
258 | + # )) | |
259 | + | |
260 | + fig.update_traces(marker_line_width=3, | |
261 | + marker_line_color='white') | |
262 | + fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0}) | |
263 | + fig.update_annotations(showarrow=False, visible=True) | |
264 | + fig.update_geos(fitbounds="locations", visible=False) | |
265 | + | |
266 | + fig.update_layout( | |
267 | + legend=dict( | |
268 | + yanchor="top", | |
269 | + y=0.99, | |
270 | + xanchor="left", | |
271 | + x=0.01 | |
272 | + ) | |
273 | + ) | |
274 | + | |
275 | + fig.update_layout( | |
276 | + title_text=f"{title}", | |
277 | + ) | |
278 | + | |
279 | + | |
280 | + | |
281 | + fig.update_layout( | |
282 | + font=dict({'family': f'{font}'}) | |
283 | + ) | |
284 | + | |
285 | + | |
286 | + fig.update_layout( | |
287 | + coloraxis_colorbar=dict( | |
288 | + title=f"단위 : {unit}", | |
289 | + ) | |
290 | + ) | |
291 | + | |
292 | + # fig.show() # debug | |
293 | + if save: | |
294 | + # slicing static image because there is no way to adjust geojson object there. | |
295 | + img = plotly_fig2array(fig, scale) | |
296 | + img = img[:, 460*scale : 1500*scale] | |
297 | + p = f"figure{sep}{save_dir}.png" | |
298 | + cv2.imwrite(p, cv2.cvtColor(img, cv2.COLOR_RGB2BGR)) | |
299 | + fig.update_layout( | |
300 | + title=dict( | |
301 | + yanchor="top", | |
302 | + y=0.98, | |
303 | + xanchor="left", | |
304 | + x=0.32, | |
305 | + font_size=40, | |
306 | + font_color="Black" | |
307 | + ), | |
308 | + ) | |
309 | + print(f"saved at : {p}") | |
310 | + else: | |
311 | + return fig |
+++ font_names.py
... | ... | @@ -0,0 +1,11 @@ |
1 | +import matplotlib.font_manager | |
2 | +from PIL import ImageFont | |
3 | + | |
4 | +# Iterate over all font files known to matplotlib | |
5 | +for filename in matplotlib.font_manager.findSystemFonts(): | |
6 | + # Avoid these two trouble makers - don't know why they are problematic | |
7 | + if "Emoji" not in filename and "18030" not in filename: | |
8 | + # Look up what PIL knows about the font | |
9 | + font = ImageFont.FreeTypeFont(filename) | |
10 | + name, weight = font.getname() | |
11 | + print(f'File: {filename}, fontname: {name}, weight: {weight}')(No newline at end of file) |
+++ map/영천시 행정동.dbf
Binary file is not shown |
+++ map/영천시 행정동.shp
Binary file is not shown |
+++ map/영천시 행정동.shx
Binary file is not shown |
+++ test.py
... | ... | @@ -0,0 +1,96 @@ |
1 | +import geopandas as gpd | |
2 | +import pandas as pd | |
3 | +import glob | |
4 | +import numpy as np | |
5 | +from choropleth import choropleth_chart | |
6 | + | |
7 | +# 데이터는 행정구역 가나다순으로 넣으세요 | |
8 | +shp = gpd.read_file('map/영천시 행정동.shp', encoding='utf-8') | |
9 | +shp = shp.sort_values('EMD_KOR_NM') | |
10 | +shp = shp.reset_index() | |
11 | +df = pd.read_csv("data/stat/동별종합.csv") | |
12 | + | |
13 | + | |
14 | +colorscale = [\ | |
15 | + [0,'#E02C4E'], | |
16 | + [0.33, '#F0E32E'], | |
17 | + [0.57, '#6FEB75'], | |
18 | + [1, '#5FB199'] | |
19 | + ] | |
20 | + | |
21 | +datas = glob.glob(f'data/stat/*현황.xlsx') | |
22 | +datas = sorted(datas) | |
23 | +total = [None] * 16 | |
24 | +for i, data in enumerate(datas): | |
25 | + data = pd.read_excel(data) | |
26 | + total[i] = data.loc[:, '계'].sum() | |
27 | +print(total) | |
28 | +sum = np.sum(total) | |
29 | +sum = total/sum | |
30 | +print(sum) | |
31 | + | |
32 | +man = [None] * 16 | |
33 | +for i, data in enumerate(datas): | |
34 | + data = pd.read_excel(data) | |
35 | + man[i] = data.loc[:,'남'].sum() | |
36 | + | |
37 | +woman = [None] * 16 | |
38 | +for i, data in enumerate(datas): | |
39 | + data = pd.read_excel(data) | |
40 | + woman[i] = data.loc[:,'여'].sum() | |
41 | + | |
42 | + | |
43 | +# total = pd.DataFrame(total,index=shp['EMD_KOR_NM'].values) | |
44 | +# choropleth_chart(shp,total,' ','읍면동별 1인가구수', show_legend=False, unit="가구" ,colorscheme="OrRd", adaptive_annotation=True) | |
45 | +# choropleth_chart(shp,sum,' ','읍면동별 1인가구수백분율', show_legend=False, unit="%", colorscheme="OrRd", adaptive_annotation=True) | |
46 | +# choropleth_chart(shp,man,' ','읍면동별 1인가구수남자', show_legend=False, unit="가구", colorscheme="OrRd", adaptive_annotation=True) | |
47 | +# choropleth_chart(shp,woman,' ','읍면동별 1인가구수여자', show_legend=False, unit="가구", colorscheme="OrRd", adaptive_annotation=True) | |
48 | +# colorscale = [\ | |
49 | +# [0,'#E07C0E'], | |
50 | +# [0.33, '#F0E32E'], | |
51 | +# [0.57, '#6FEB75'], | |
52 | +# [1, '#5FB1F5'] | |
53 | +# ] | |
54 | + | |
55 | +colorscale = [\ | |
56 | + [1-0,'#E02C4E'], | |
57 | + [1-0.4, '#F0E32E'], | |
58 | + [1-0.7, '#6FEB75'], | |
59 | + [1-1, '#5FB1F5'] | |
60 | +] | |
61 | + | |
62 | +cdf = df.iloc[:,1] | |
63 | +cdf += df.iloc[:,3] | |
64 | +choropleth_chart(shp, cdf, '2022 영천시 1인가구중 중년이상비율', f"1인가구 중년이상", | |
65 | + colorscheme=colorscale[::-1], adaptive_legend_font_size=True) | |
66 | + | |
67 | +# df = pd.read_csv("data/stat/경상북도 영천시_경로당_20221108.csv") | |
68 | +# | |
69 | +# cdf = df['행정동명'].value_counts() | |
70 | +# # | |
71 | +# colorscale = [\ | |
72 | +# [0,'#E02C4E'], | |
73 | +# [0.33, '#F0E32E'], | |
74 | +# [0.57, '#6FEB75'], | |
75 | +# [1, '#5FB199'] | |
76 | +# ] | |
77 | +# # | |
78 | +# choropleth_chart(shp, cdf, '', f"figure2022노인정test", | |
79 | +# colorscheme=colorscale, adaptive_annotation=True) | |
80 | + | |
81 | +# df = pd.read_excel("data/영천시 2021년 bc카드 행정구역 데이터.xlsx") | |
82 | +# df = df.sort_values('가맹점_읍면동명') | |
83 | +# df = df.reset_index() | |
84 | +# col_index = df.columns | |
85 | +# print(col_index) | |
86 | +# for col in col_index: | |
87 | +# cdf = df[col] | |
88 | +# choropleth_chart(shp, cdf, f"2021년 bc카드 {col}", f"figure2021{col}", adaptive_annotation=True) | |
89 | +# | |
90 | +# df = pd.read_excel("data/2022년 bc카드 행정구역 별 데이터.xlsx") | |
91 | +# col_index = df.columns | |
92 | +# print(col_index) | |
93 | +# # cdf = df['행정동명'].value_counts() | |
94 | +# for col in col_index: | |
95 | +# cdf = df[col] | |
96 | +# choropleth_chart(shp, cdf, f"2022년 bc카드 {col}", f"figure2022{col}", adaptive_annotation=True)(No newline at end of file) |
Add a comment
Delete comment
Once you delete this comment, you won't be able to recover it. Are you sure you want to delete this comment?