File name
Commit message
Commit date
File name
Commit message
Commit date
import geopandas as gpd
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed
import geopandas as gpd
import pandas as pd
from concurrent.futures import ProcessPoolExecutor
import os
# def prepare_tasks(under, over, task_que):
# task_list = []
# # We want to ensure minimal data is being moved around, so we prepare data slices here
# # that will be passed directly to each worker process
# for under_index, over_indices in task_que.items():
# # Extract only the necessary rows from 'under' and 'over' and create lightweight copies
# under_slice = under.iloc[[under_index]].copy() # Copy to ensure continuity outside the loop
# over_slice = over.iloc[over_indices].copy() # Copy to ensure continuity outside the loop
# task_list.append({
# 'under_data': under_slice,
# 'over_data': over_slice
# })
# return task_list
#
#
# def process_intersection(task, under_data, over_data, how="intersection"):
# # Extract the relevant portions of the dataframes
# ref_poly = under_data.iloc[[task['under_index']]]
# comp_poly = over_data.iloc[task['over_indices']]
# # Perform the overlay operation
# intersection = ref_poly.overlay(comp_poly, how=how, keep_geom_type=False)
# return intersection
#
#
# def geom_overlay(under, over, how="intersection"):
# if under.crs != over.crs:
# print(f"Error: CRS mismatch \nUnder CRS: {under.crs}, \nOver CRS: {over.crs}")
# return None
# print(f"under : {len(under)}, over : {len(over)}")
# if len(under) > len(over):
# under, over = over, under
#
# print("creating spatial index queries...")
# sindex_query_result = under.sindex.query(over["geometry"], predicate="intersects")
# task_que = {}
# for key, value in zip(sindex_query_result[1], sindex_query_result[0]):
# if key in task_que:
# task_que[key].append(value)
# else:
# task_que[key] = [value]
#
# print("preparing the task...")
# print(len(task_que))
# task_list = prepare_tasks(under, over, task_que)
#
# print("executing...")
# with ProcessPoolExecutor(max_workers=os.cpu_count()) as executor:
# future_to_task = {executor.submit(process_intersection, task, how): task for task in task_list}
# total_tasks = len(future_to_task)
# completed_tasks = 0
#
# for future in as_completed(future_to_task):
# completed_tasks += 1
# print(f"Completed {completed_tasks} of {total_tasks} tasks")
# result = future.result() # Collect the result from the future
#
# resulting_intersection_geom = pd.concat([future.result() for future in future_to_task])
# return resulting_intersection_geom
def geom_overlay(under, over, how="intersection"):
under_crs = over.crs
over_crs = under.crs
# switch values for optimization
if len(under) > len (over):
under, over = over, under
if over_crs != under_crs:
print(f"error : crs_mismatch \n under : {under_crs},\n over: {over_crs}")
exit()
print("creating_spatial index...")
sindex_query_result = under.sindex.query(over["geometry"], predicate="intersects")
# key is the reference(under) polygon index, and value is comparative(over) polygon index
# I recommend to use reference layer with fewer polygons then comparative one. (saves search time)
task_que = {}
for key, value in zip(sindex_query_result[1], sindex_query_result[0]):
if key in task_que:
task_que[key].append(value)
else:
task_que[key] = [value]
intersections = []
print("executing...")
for i, key in enumerate(task_que):
# double brackets for... not passing row as a column.
# how intuitive (clap. clap. clap.)
ref_poly = under.iloc[[key]]
ref_poly = gpd.GeoDataFrame(ref_poly, crs=over_crs)
comp_poly = over.iloc[task_que[key]]
intersection = ref_poly.overlay(comp_poly, how=how, keep_geom_type=False)
intersections.append(intersection)
print(f"{i}/{len(task_que)}")
if len(intersections) > 0:
resulting_intersection_geom = pd.concat(intersections)
else :
resulting_intersection_geom = []
return resulting_intersection_geom