juni 2023-06-13
missing files
@a566adae933eb7d1530197df8ed1578d5102ed64
 
choropleth.py (added)
+++ choropleth.py
@@ -0,0 +1,311 @@
+import os
+import io
+from PIL import Image
+import numpy as np
+import plotly.express as px
+import plotly.graph_objects as go
+import cv2
+from itertools import compress
+from geopandas import points_from_xy
+from scipy.spatial import Voronoi, voronoi_plot_2d
+from scipy.spatial.distance import cdist
+# import matplotlib.pyplot as plt  # debugging
+
+sep = os.path.sep
+
+
+def extract(lst, i):
+    return [item[i] for item in lst]
+
+
+def extract_coord(lst):
+    return [item.coords[:] for item in lst]
+
+
+def coord_lister(geom):
+    coords = list(geom.exterior.coords)
+    return (coords)
+
+
+# def vor_ver(furthest_pnt, i):
+#     return furthest_pnt[i].vertices
+
+def get_plotting_zoom_level_and_center_coordinates_from_lonlat_tuples(longitudes=None, latitudes=None):
+    """Function documentation:\n
+    Basic framework adopted from Krichardson under the following thread:
+    https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/7
+
+    # NOTE:
+    # THIS IS A TEMPORARY SOLUTION UNTIL THE DASH TEAM IMPLEMENTS DYNAMIC ZOOM
+    # in their plotly-functions associated with mapbox, such as go.Densitymapbox() etc.
+
+    Returns the appropriate zoom-level for these plotly-mapbox-graphics along with
+    the center coordinate tuple of all provided coordinate tuples.
+    """
+
+    # Check whether both latitudes and longitudes have been passed,
+    # or if the list lenghts don't match
+    if ((latitudes is None or longitudes is None)
+            or (len(latitudes) != len(longitudes))):
+        # Otherwise, return the default values of 0 zoom and the coordinate origin as center point
+        return 0, (0, 0)
+
+    # Get the boundary-box
+    b_box = {}
+    b_box['height'] = latitudes.max() - latitudes.min()
+    b_box['width'] = longitudes.max() - longitudes.min()
+    b_box['center'] = (np.mean(longitudes), np.mean(latitudes))
+
+    # get the area of the bounding box in order to calculate a zoom-level
+    area = b_box['height'] * b_box['width']
+
+    # * 1D-linear interpolation with numpy:
+    # - Pass the area as the only x-value and not as a list, in order to return a scalar as well
+    # - The x-points "xp" should be in parts in comparable order of magnitude of the given area
+    # - The zoom-levels are adapted to the areas, i.e. start with the smallest area possible of 0
+    # which leads to the highest possible zoom value 20, and so forth decreasing with increasing areas
+    # as these variables are antiproportional
+    zoom = np.interp(x=area,
+                     xp=[0, 5 ** -10, 4 ** -10, 3 ** -10, 2 ** -10, 1 ** -10, 1 ** -5],
+                     fp=[20, 15, 14, 13, 12, 7, 5])
+
+    # Finally, return the zoom level and the associated boundary-box center coordinates
+    return zoom, b_box['center']
+
+
+def plotly_fig2array(fig, scale):
+    # convert Plotly fig to  an array
+    fig_bytes = fig.to_image(format="png", width=2000, height=800, scale=scale)
+    buf = io.BytesIO(fig_bytes)
+    img = Image.open(buf)
+    return np.asarray(img)
+
+
+def beautify_text_placement(shp):
+    """
+    Function to optimize text placement and size using Voronoi diagram.
+
+    Arguments:
+    shp -- shape object
+    """
+    num_area = len(shp)
+    polygons_coord = shp.geometry.apply(coord_lister)
+    furthest_pnt = [None] * num_area
+    voronoi_ori = [None] * num_area
+    for i, points in enumerate(polygons_coord):
+        voronoi_ori[i] = Voronoi(points, furthest_site=False)
+        furthest_pnt[i] = voronoi_ori[i].vertices
+
+    voronoi_nodes = [None] * num_area
+    leccc = [None] * num_area
+    # get voronoi nodes that is inside the polygon.
+    for i in range(num_area):
+        voronoi_nodes[i] = points_from_xy(extract(furthest_pnt[i], 0), extract(furthest_pnt[i], 1))
+        voronoi_nodes_if_inside = shp.geometry[i].contains(voronoi_nodes[i])
+        voronoi_nodes[i] = list(compress(voronoi_nodes[i], voronoi_nodes_if_inside))
+        leccc[i] = [item for sublist in extract_coord(voronoi_nodes[i]) for item in sublist]
+
+    dist = [None] * num_area
+    for i, points in enumerate(polygons_coord):
+        # explaining this mess : I need to get the LEC from candidate I got before, but the thing is,
+        # cdist from scipy only accepts exact dimension with array like object,
+        # which means I need to explicitly set the shape of ndarray, convert list of points into ndarray
+        dist[i] = cdist(np.array([list(item) for item in points]), np.array([list(item) for item in leccc[i]]))
+
+    lecc = [None] * num_area
+    lecc_dist = [None] * num_area
+    for i in range(num_area):
+        voronoi_nodes_dist = [None] * (dist[i].shape[1])
+        for j in range(dist[i].shape[1]):
+            voronoi_nodes_dist[j] = min(dist[i][:, j])
+        lecc_ind = np.argmax(voronoi_nodes_dist)
+        lecc[i] = leccc[i][lecc_ind]
+        lecc_dist[i] = np.max(voronoi_nodes_dist)
+
+    return lecc, lecc_dist
+
+
+def choropleth_chart(shp, df, title, save_dir, colorscheme="BuGn", show_legend=True, unit='',
+                     adaptive_legend_font_size=False, geo_annot_scale = 1500,
+                     font="Open Sans", scale=4, save=True):
+    f"""
+    shp : geopandas datastream from .shp, .shx and .dbf must exist in the same folder
+    df : single column of pandas DataFrame or any other iterable, height must be the number of object of shp
+    title : title of the chart
+    save_dir : save directory from 'figure/save_dir.png' 
+    colorscheme : either string of predefined plotly colorscheme or your colorscheme that is iterate of colors  
+    show_legend : Boolean, self explainatory 
+    adaptive_legend_font_size : legend of regions become adaptive using voronoi algorithm,
+                          asserting the biggest annotation adaptive to the size of the region
+                          If false, there will be no annotation over region
+                          This, takes some time and could be improved if you could simplify polygons,
+                          which is not implemented.
+    font : Defaults to the default plotly font(Open Sans) if None. 
+           Type font name string if you need. 
+           Execute font_names.py if you want to see the names of the font that python env recognizes.
+    scale : the larger the better
+    save : Boolean, default is True. To save or not to save. 
+    """
+    # TODO add more settings
+
+    num_area = len(shp)
+    cent_pnt = [None] * num_area
+    for i, cent in enumerate(shp.centroid):
+        cent_pnt[i] = [cent.x, cent.y]
+
+    cent_map_x = sum(extract(cent_pnt, 0)) / num_area
+    cent_map_y = sum(extract(cent_pnt, 1)) / num_area
+
+    lecc = None
+    lecc_dist = None
+    if adaptive_legend_font_size:
+        lecc, lecc_dist = beautify_text_placement(shp)
+
+    # draw the choropleth
+    fig = px.choropleth(
+        # draw shp map
+        shp.set_index("EMD_KOR_NM"),
+        locations=shp.index,  # names
+        geojson=shp.geometry,  # geojson shape
+        color=df,  # a row vector of data
+        color_continuous_scale=colorscheme,
+        center=dict(lat=cent_map_y, lon=cent_map_x),
+        range_color=[0, max(df)],
+    )
+
+    fig.update_coloraxes(
+        colorbar=dict(
+            title=dict(
+                text="",
+                font=dict(
+                    size=30
+                ),
+            ),
+            xanchor="left",
+            x=0.235,
+            tickfont=dict(
+                size=20
+            ),
+            # tickformat=".0%"
+            # put tickformat for later uses?
+        ),
+
+    )
+
+    if show_legend:
+        if adaptive_legend_font_size:
+            print("hello")
+            annotation_text_size = np.multiply(lecc_dist, geo_annot_scale)
+            fig.add_trace(
+                go.Scattergeo(
+                    # draw region names based on lat and lon
+                    lat=extract(lecc, 1), lon=extract(lecc, 0),
+                    # lat=cent_pnt_y, lon=cent_pnt_x,
+                    marker={
+                        "size": [0] * num_area,
+                    },
+                    mode="text",
+                    name="",
+                    text=shp["EMD_KOR_NM"].values,
+                    textposition=["middle center"] * num_area,
+                    textfont={
+                        # "color": ["Black"] * num_area,
+                        "family": [f"{font}"] * num_area,
+                        "size": annotation_text_size,
+                    }
+                )
+            )
+        else:
+            annotation_text_size = [40] * num_area
+            fig.add_trace(
+                go.Scattergeo(
+                    # draw region names based on lat and lon
+                    lat=extract(cent_pnt, 1), lon=extract(cent_pnt, 0),
+                    # lat=cent_pnt_y, lon=cent_pnt_x,
+                    marker={
+                        "size": [0] * num_area,
+                    },
+                    mode="text",
+                    name="",
+                    text=shp["EMD_KOR_NM"].values,
+                    textposition=["middle center"] * num_area,
+                    textfont={
+                        # "color": ["Black"] * num_area,
+                        "family": [f"{font}"] * num_area,
+                        "size": annotation_text_size,
+                    }
+                )
+            )
+
+
+    # testing with more annotation, such as putting numbers.
+    # fig.add_trace(go.Scattergeo(
+    #     # draw region names based on lat and lon
+    #     lat=extract(lecc, 1), lon=extract(lecc, 0),
+    #     # lat=cent_pnt_y, lon=cent_pnt_x,
+    #     marker={
+    #         "size": [0] * num_area,
+    #     },
+    #     mode="text",
+    #     name="",
+    #     text=df,
+    #     textposition=["bottom center"] * num_area,
+    #     textfont={
+    #         "color": ["Black"] * num_area,
+    #         "family": ["Open Sans"] * num_area,
+    #         "size": annotation_text_size*0.6,
+    #     }
+    # ))
+
+    fig.update_traces(marker_line_width=3,
+                      marker_line_color='white')
+    fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
+    fig.update_annotations(showarrow=False, visible=True)
+    fig.update_geos(fitbounds="locations", visible=False)
+
+    fig.update_layout(
+        legend=dict(
+            yanchor="top",
+            y=0.99,
+            xanchor="left",
+            x=0.01
+        )
+    )
+
+    fig.update_layout(
+        title_text=f"{title}",
+    )
+
+
+
+    fig.update_layout(
+        font=dict({'family': f'{font}'})
+    )
+
+
+    fig.update_layout(
+        coloraxis_colorbar=dict(
+            title=f"단위 : {unit}",
+        )
+    )
+
+    # fig.show()  # debug
+    if save:
+        # slicing static image because there is no way to adjust geojson object there.
+        img = plotly_fig2array(fig, scale)
+        img = img[:, 460*scale : 1500*scale]
+        p = f"figure{sep}{save_dir}.png"
+        cv2.imwrite(p, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
+        fig.update_layout(
+            title=dict(
+                yanchor="top",
+                y=0.98,
+                xanchor="left",
+                x=0.32,
+                font_size=40,
+                font_color="Black"
+            ),
+        )
+        print(f"saved at : {p}")
+    else:
+        return fig
 
font_names.py (added)
+++ font_names.py
@@ -0,0 +1,11 @@
+import matplotlib.font_manager
+from PIL import ImageFont
+
+# Iterate over all font files known to matplotlib
+for filename in matplotlib.font_manager.findSystemFonts():
+    # Avoid these two trouble makers - don't know why they are problematic
+    if "Emoji" not in filename and "18030" not in filename:
+        # Look up what PIL knows about the font
+        font = ImageFont.FreeTypeFont(filename)
+        name, weight = font.getname()
+        print(f'File: {filename}, fontname: {name}, weight: {weight}')(No newline at end of file)
 
map/영천시 행정동.dbf (Binary) (added)
+++ map/영천시 행정동.dbf
Binary file is not shown
 
map/영천시 행정동.shp (Binary) (added)
+++ map/영천시 행정동.shp
Binary file is not shown
 
map/영천시 행정동.shx (Binary) (added)
+++ map/영천시 행정동.shx
Binary file is not shown
 
test.py (added)
+++ test.py
@@ -0,0 +1,96 @@
+import geopandas as gpd
+import pandas as pd
+import glob
+import numpy as np
+from choropleth import choropleth_chart
+
+# 데이터는 행정구역 가나다순으로 넣으세요
+shp = gpd.read_file('map/영천시 행정동.shp', encoding='utf-8')
+shp = shp.sort_values('EMD_KOR_NM')
+shp = shp.reset_index()
+df = pd.read_csv("data/stat/동별종합.csv")
+
+
+colorscale = [\
+    [0,'#E02C4E'],
+    [0.33, '#F0E32E'],
+    [0.57, '#6FEB75'],
+    [1, '#5FB199']
+    ]
+
+datas = glob.glob(f'data/stat/*현황.xlsx')
+datas = sorted(datas)
+total = [None] * 16
+for i, data in enumerate(datas):
+    data = pd.read_excel(data)
+    total[i] = data.loc[:, '계'].sum()
+print(total)
+sum = np.sum(total)
+sum = total/sum
+print(sum)
+
+man = [None] * 16
+for i, data in enumerate(datas):
+    data = pd.read_excel(data)
+    man[i] = data.loc[:,'남'].sum()
+
+woman = [None] * 16
+for i, data in enumerate(datas):
+    data = pd.read_excel(data)
+    woman[i] = data.loc[:,'여'].sum()
+
+
+# total = pd.DataFrame(total,index=shp['EMD_KOR_NM'].values)
+# choropleth_chart(shp,total,' ','읍면동별 1인가구수', show_legend=False, unit="가구" ,colorscheme="OrRd", adaptive_annotation=True)
+# choropleth_chart(shp,sum,' ','읍면동별 1인가구수백분율', show_legend=False, unit="%", colorscheme="OrRd", adaptive_annotation=True)
+# choropleth_chart(shp,man,' ','읍면동별 1인가구수남자', show_legend=False, unit="가구", colorscheme="OrRd", adaptive_annotation=True)
+# choropleth_chart(shp,woman,' ','읍면동별 1인가구수여자', show_legend=False, unit="가구", colorscheme="OrRd", adaptive_annotation=True)
+# colorscale = [\
+#     [0,'#E07C0E'],
+#     [0.33, '#F0E32E'],
+#     [0.57, '#6FEB75'],
+#     [1, '#5FB1F5']
+# ]
+
+colorscale = [\
+    [1-0,'#E02C4E'],
+    [1-0.4, '#F0E32E'],
+    [1-0.7, '#6FEB75'],
+    [1-1, '#5FB1F5']
+]
+
+cdf = df.iloc[:,1]
+cdf += df.iloc[:,3]
+choropleth_chart(shp, cdf, '2022 영천시 1인가구중 중년이상비율', f"1인가구 중년이상",
+                 colorscheme=colorscale[::-1], adaptive_legend_font_size=True)
+
+# df = pd.read_csv("data/stat/경상북도 영천시_경로당_20221108.csv")
+#
+# cdf = df['행정동명'].value_counts()
+# #
+# colorscale = [\
+#     [0,'#E02C4E'],
+#     [0.33, '#F0E32E'],
+#     [0.57, '#6FEB75'],
+#     [1, '#5FB199']
+# ]
+# #
+# choropleth_chart(shp, cdf, '', f"figure2022노인정test",
+#                  colorscheme=colorscale, adaptive_annotation=True)
+
+# df = pd.read_excel("data/영천시 2021년 bc카드 행정구역 데이터.xlsx")
+# df = df.sort_values('가맹점_읍면동명')
+# df = df.reset_index()
+# col_index = df.columns
+# print(col_index)
+# for col in col_index:
+#     cdf = df[col]
+#     choropleth_chart(shp, cdf, f"2021년 bc카드 {col}", f"figure2021{col}", adaptive_annotation=True)
+#
+# df = pd.read_excel("data/2022년 bc카드 행정구역 별 데이터.xlsx")
+# col_index = df.columns
+# print(col_index)
+# # cdf = df['행정동명'].value_counts()
+# for col in col_index:
+#     cdf = df[col]
+#     choropleth_chart(shp, cdf, f"2022년 bc카드 {col}", f"figure2022{col}", adaptive_annotation=True)(No newline at end of file)
Add a comment
List