This Notebook contains the complete process to apply weights to meinGrün Zielgeometrien extracted from LBSM data (geolocated Social Media Posts).
Intersection of points and coordinates is not complex. However, there're several potential pitfalls in the process.
Pitfalls:
Lets first start with importing the necessary dependencies.
First, make sure you have these packages installed. Dependency management in python can be tricky. The following instructions are based on conda package manager (e.g. miniconda). By pinning one package (holoviews), we can make sure that 1) compatible package versions are installed for all other packages and 2) the code written herein is compatible.
conda create -n jupyter_env_meingruen python=3.7 -c pyviz -c conda-forge
conda activate jupyter_env_meingruen
conda config --env --set channel_priority strict
conda config --show channel_priority # verify
conda install -c pyviz -c conda-forge geoviews "bokeh<2.0.0" seaborn shapely "holoviews=1.12.7" geopandas matplotlib jupyterlab descartes mapclassify
Optional:
conda install -c pyviz -c conda-forge jupyter_contrib_nbextensions tagmaps
Alternatively, to create an env with the latest versions of bokeh, holoviews, geoviews, use the following approach:
conda create -n jupyter_env_meingruen python=3.7 -c "pyviz/label/dev" -c conda-forge
conda activate jupyter_env_meingruen
conda config --env --set channel_priority strict
conda install -c "pyviz/label/dev" -c conda-forge geoviews bokeh seaborn shapely holoviews geopandas matplotlib jupyterlab descartes mapclassify tagmaps jupyter_contrib_nbextensions
import math
import csv
import fiona
import geoviews as gv
import geoviews.feature as gf
import mapclassify as mc
import seaborn as sns
import datetime as dt
import geopandas as gp
import pandas as pd
import holoviews as hv
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from rtree import index
from pathlib import Path
from shapely.geometry import shape
from pyproj import Transformer, CRS, Proj
from shapely.ops import transform
from shapely.geometry import Point, Polygon, MultiPolygon
from shapely.geometry.collection import GeometryCollection
from holoviews import dim
from holoviews.operation.datashader import datashade, shade, dynspread, rasterize
from geopandas.tools import sjoin
from matplotlib.patches import Patch
# enable shapely.speedups which makes some of the spatial queries running faster.
import shapely.speedups
shapely.speedups.enable()
from cartopy import crs as ccrs
from collections import defaultdict
from collections import namedtuple
from tagmaps.classes.utils import Utils
from IPython.display import clear_output, Markdown, display
hv.notebook_extension('bokeh')
Plot used package versions:
import pkg_resources
root_packages = [
'geoviews', 'geopandas', 'pandas', 'numpy', 'descartes',
'matplotlib', 'shapely', 'cartopy', 'holoviews',
'mapclassify', 'fiona', 'bokeh', 'pyproj', 'ipython',
'jupyterlab', 'tagmaps']
root_packages.sort(reverse=True)
root_packages_list = []
for m in pkg_resources.working_set:
if m.project_name.lower() in root_packages:
root_packages_list.append([m.project_name, m.version])
display(pd.DataFrame(
root_packages_list,
columns=["package", "version"]
).set_index("package").transpose())
display(Markdown('---'))
Because we're going to display pretty large maps, graphics and interactive visualizations in this notebook, the following line may improve jupyter speed.
%config Completer.use_jedi = False
This section contains the only manual code edits that are needed to run this notebook.
The files should be places in a subfolder called "./01_Input/" (relative to this notebook).
# TARGETSHAPE_VERSION = 'v4.1'
# TARGETSHAPE_VERSION = 'v170919'
TARGETSHAPE_VERSION = 'v5'
INTERSECT_VERSION = 'v1.2'
## Heidelberg Offiziell
# LEVEL = "L0"
# TARGET_SHAPE_ID_COL = "id"
# GREEN_TARGET_COL = None
# ZIELGEOMETRIEN_DATA = "GeoJSON_Heidelberg_v05/greentargets_hd.json"
## Level 0:
LEVEL = "L0"
TARGET_SHAPE_ID_COL = "TARGET_ID"
GREEN_TARGET_COL = "TARGET_green_target"
ZIELGEOMETRIEN_DATA = "GeoJSON_Dresden_v05/targets.json"
# ZIELGEOMETRIEN_DATA = "GeoJSON_Heidelberg_v05/targets.json"
## Level 1 (Wege):
# LEVEL = "L1"
# TARGET_SHAPE_ID_COL = "WAY_ID"
# GREEN_TARGET_COL = "PARENT_TARGET_green_target"
# ZIELGEOMETRIEN_DATA = "GeoJSON_Dresden_4.1/targetParts_Wege.json"
# ZIELGEOMETRIEN_DATA = "GeoJSON_Heidelberg/targets_Wege.json"
## Level 2 (Wege LC):
# LEVEL = "L2"
# TARGET_SHAPE_ID_COL = "LC_ID"
# GREEN_TARGET_COL = "PARENT_TARGET_green_target"
# ZIELGEOMETRIEN_DATA = "GeoJSON_Dresden_4.1/targetParts_Wege_LC.json"
# ZIELGEOMETRIEN_DATA = "GeoJSON_Heidelberg/targets_Wege_LC.json"
## Level 3 (Wege LULC):
# LEVEL = "L3"
# TARGET_SHAPE_ID_COL = "LULC_ID"
# GREEN_TARGET_COL = "PARENT_TARGET_green_target"
# ZIELGEOMETRIEN_DATA = "GeoJSON_Dresden_4.1/targetParts_Wege_LULC.json"
# ZIELGEOMETRIEN_DATA = "GeoJSON_Heidelberg/targets_Wege_LULC.json"
# LBSM_INPUT_DATA = "Output_cleaned_Dresden_FlickrOnly.csv"
# LBSM_INPUT_DATA = "Output_cleaned_Heidelberg_FlickrOnly.csv"
# LBSM_INPUT_DATA = "2019-09-03_meinGruen_Heidelberg.csv"
LBSM_INPUT_DATA = "2019-09-02_meinGruen_Dresden.csv"
def get_str_formatted_today():
"""Returns date as string (YYYY-mm-dd)"""
today = dt.date.today()
today_str = today.strftime("%Y-%m-%d")
return today_str
TODAY = get_str_formatted_today()
display(Markdown(f'**Last execution date: {TODAY}**'))
Define manual boundaries for map zooms
Define initial extent where the map should be zoomed to. If empty, the map will show the full boundary of the provided osm-graph (which may result in slow performance of the final map). To select boundary lat/lng coordinates, use (online tool link).
# Dresden Altstadt
LIM_LNG_MAX_1 = 13.750784
LIM_LNG_MIN_1 = 13.730367
LIM_LAT_MAX_1 = 51.055349
LIM_LAT_MIN_1 = 51.048746
# Dresden Center
LIM_LNG_MAX_2 = 13.788357
LIM_LNG_MIN_2 = 13.706689
LIM_LAT_MAX_2 = 51.060386
LIM_LAT_MIN_2 = 51.033973
# Dresden Neustadt:
LIM_LNG_MAX_3 = 13.782692
LIM_LNG_MIN_3 = 13.701024
LIM_LAT_MAX_3 = 51.087890
LIM_LAT_MIN_3 = 51.061492
# Heidelberg Center
LIM_LNG_MAX_4 = 8.73
LIM_LNG_MIN_4 = 8.68
LIM_LAT_MAX_4 = 49.420
LIM_LAT_MIN_4 = 49.404
# Total Bounds Dresden
LIM_LNG_MAX_5 = 14.0876945
LIM_LNG_MIN_5 = 13.46475261
LIM_LAT_MAX_5 = 51.24305035
LIM_LAT_MIN_5 = 50.90449772
Define the search max radius (in meters) to be used when intersecting polys with points and provide a name (used for writing data):
SEARCH_RADIUS_MAX = 50
CITY_NAME = "Dresden"
# CITY_NAME = "Heidelberg"
Load GeoJSON
geom_input_filepath = Path.cwd() / "01_Input" / ZIELGEOMETRIEN_DATA
%%time
meingruen_targetgeom_shapes = gp.read_file(geom_input_filepath)
total_bounds = meingruen_targetgeom_shapes.to_crs("epsg:4326").total_bounds
print(f'Total bounds: {total_bounds}')
print(f'Number of shapes: {len(meingruen_targetgeom_shapes)}')
print(f'{meingruen_targetgeom_shapes.crs}')
Assign bounds to globa variables so we can use those later.
LIM_LNG_MIN, LIM_LAT_MIN, LIM_LNG_MAX, LIM_LAT_MAX = total_bounds
# gdf_targetgeom_shapes = meingruen_targetgeom_shapes.to_crs({'init': 'epsg:3395'})
gdf_targetgeom_shapes = meingruen_targetgeom_shapes.to_crs('epsg:3395')
Preview Shape Table
gdf_targetgeom_shapes.head()
Remove unnecessary columns
There're many columns in this dataset that are of no interest to the computation in this notebook. Have a look whats there:
list(gdf_targetgeom_shapes.columns.values)
Drop columns that are not needed:
def try_drop_cols_df(gdf, drop_cols):
"""Drop columns from pandas dataframe with try..except"""
for drop_col in drop_cols:
# apply drop
try:
gdf.drop(
columns=[drop_col], inplace=True)
except KeyError:
pass
drop_list = ['class0_ratio', 'class1_ratio', 'class2_ratio', 'class3_ratio',
'class4_ratio', 'class4_ratio', 'class4_ratio', 'class4_ratio',
'classNoData_ratio', 'class0_area', 'class1_area', 'class2_area',
'class3_area', 'class4_area', 'classNoData_area', 'SHAPE_Length',
'SHAPE_Area', 'WAY_AREA', 'WAY_LENGTH', 'PARENT_TARGET_ID',
'PARENT_TARGET_gemeinde_DD','PARENT_WAY_ID', 'LULC_origin',
'LULC_origin', 'LULC_access', 'LULC_name', 'LULC_type',
'LULC_type_ger', 'LULC_rank', 'LULC_AREA', 'LULC_LENGTH',
'LC_LENGTH', 'LC_AREA', 'class_name', 'class_code',
'TARGET_access', 'TARGET_name', 'TARGET_names', 'TARGET_type',
'TARGET_types', 'TARGET_AREA', 'TARGET_LENGTH',
'text_1', 'text_3', 'text_2']
# apply drop
try_drop_cols_df(gdf_targetgeom_shapes, drop_list)
Check the new list of remaining columns
list(gdf_targetgeom_shapes.columns.values)
gdf_targetgeom_shapes.geometry.plot(figsize=(20,10))
Print bounds of target shapes (World Mercator Porjection)
gdf_targetgeom_shapes.geometry.total_bounds
There are several projections used in this notebook:
For accurate calculations in meters distance, we need to project data to UTM. Tagmaps package has a methods that will return the best UTM projection based on a lat-lng coordinate:
def get_poly_from_bounds(bounds):
'''Add small buffer around path bounds'''
xmin, ymin, xmax, ymax = bounds
bbox_polygon = Polygon([[xmin, ymax], [xmax, ymax], [xmax, ymin], [xmin, ymin]])
return bbox_polygon
bound_poly = get_poly_from_bounds(total_bounds)
print(f'input_lon_center: {bound_poly.centroid.x}')
print(f'input_lat_center: {bound_poly.centroid.y}')
# Auto detection epsg_code:
epsg_code = Utils._convert_wgs_to_utm(
bound_poly.centroid.x, bound_poly.centroid.y)
print(epsg_code)
Or, specify manually:
crs_proj = f"epsg:{epsg_code}"
crs_wgs = "epsg:4326"
# define Transformer ahead of time
# with xy-order of coordinates
proj_transformer = Transformer.from_crs(
crs_wgs, crs_proj, always_xy=True)
def project_geometry(geom):
"""Project geometries using shapely and proj.Transform"""
geom_proj = transform(proj_transformer.transform, geom)
return geom_proj
# report
print(f'Target EPSG Code: {epsg_code}')
print(f'Target projection: {crs_proj}')
# convert to WGS1984: epsg:4326
gdf_targetgeom_shapes.to_crs(crs_proj, inplace=True)
# convert to Geoviews Shapes
gv_shapes = gv.Polygons(
[gv.Shape(feat) for feat in gdf_targetgeom_shapes.geometry if isinstance(feat, (Polygon, MultiPolygon))],
crs=ccrs.epsg(epsg_code))
Plot Rasterized static with datashade
datashade(gv_shapes).opts(width=800, height=480)
Plot interactive rasterized
%%opts Image [colorbar=True clipping_colors={'NaN': (0, 0, 0, 0)}]
original = rasterize(gv_shapes, precompute=True)
clipped = dynspread(datashade(gv_shapes))
# plot
gv.tile_sources.EsriImagery * \
clipped.opts(
width=800,
height=480,
alpha=0.8)
For appending points to polygons there're several options available:
Since it is the fastest and most versatile, we'll explore the second option.
1. Load LBSM Data
Load CSV of LBSM points to Pandas dataframe, two formats available
First, detect the format.
file_input_path = Path.cwd() / "01_Input" / LBSM_INPUT_DATA
with open(file_input_path, 'r') as infile:
reader = csv.DictReader(infile)
fieldnames = reader.fieldnames
# there're two different formats of input data
# cleaned_posts (UPL)
# and raw data (UP)
# cleaned posts have a slightly different format
# which is detected here based on the CSV's header
if 'latitude' in fieldnames:
raw_format = True
usecols = ['origin_id', 'latitude', 'longitude', 'user_guid', 'post_create_date', 'post_publish_date']
dtypes={'origin_id': str,'latitude': float, 'longitude': float, 'user_guid': str, 'post_create_date': str, 'post_publish_date': str}
elif 'lat' in fieldnames:
raw_format = False
usecols = ['origin_id', 'lat', 'lng', 'user_guid', 'post_create_date', 'post_publish_date']
dtypes={'origin_id': str,'lat': float, 'lng': float, 'user_guid': str, 'post_create_date': str, 'post_publish_date': str}
else:
print("Unknown Format")
Load data in dataframe df
%%time
df = pd.read_csv(file_input_path, usecols=usecols, dtype=dtypes, encoding='utf-8')
print(f'Loaded {len(df)} posts.')
For Flickr, post_create_date and post_publish_date is available, for Instagram and Twitter, only post_publish_date is available.
try:
df['post_create_date'].fillna(df['post_publish_date'], inplace=True)
df.drop(columns=['post_publish_date'], inplace=True)
df.rename(columns={'post_create_date': 'post_date'}, inplace=True)
except KeyError:
pass
df.head()
Optional: Get some statistics for origin
plt.rcParams.update({'xtick.labelsize': 'small'})
plt.rcParams.update({'ytick.labelsize': 'small'})
plt.rc('legend', fontsize=10)
sns.countplot(x='origin_id', hue='origin_id', data=df)
size_count = df.groupby('origin_id').size()
total_count = len(df)
for key, value in size_count.items():
print(f'Origin ID {key} with {value} posts, total percentage is: {value/(total_count/100):.2f}%')
2. Convert pandas dataframe to geopandas geodataframe
def make_point(row):
if raw_format:
return Point(row.longitude, row.latitude)
return Point(row.lng, row.lat)
# Go through every row, and make a point out of its lat and lon
points = df.apply(make_point, axis=1)
# Make a new GeoDataFrame
# using the data from our old df
# but also adding in the geometry we just made
lbsm_data_gdf = gp.GeoDataFrame(df, geometry=points, crs=crs_wgs)
# project to UTM
lbsm_data_gdf = lbsm_data_gdf.set_geometry(lbsm_data_gdf.geometry.apply(project_geometry))
# alternative for newer geopandas versions:
# lbsm_data_gdf.to_crs(crs_proj, inplace=True)
lbsm_data_gdf.head()
Lets test the inteserction for a small sample
a) Get one polygon from meinGrün Target Geometries
# Get row for specific ID
id = 9
# id = 20
# id = 26
# id = 29
# id = 42
gdf_targetgeom_shapes.iloc[id].geometry
It is possible to get bounds of the line segment by:
gdf_targetgeom_shapes.iloc[id].geometry.bounds
b) Prepare poly geoviews layer
gv_sample_poly = gv.Shape(
gdf_targetgeom_shapes.iloc[id].geometry,
label=f'meinGrün Target Shape', crs=ccrs.epsg(epsg_code))
gv_sample_poly_buf_50m = gv.Shape(
gdf_targetgeom_shapes.iloc[id].geometry.buffer(50, resolution=3, cap_style=1, join_style=1, mitre_limit=5.0),
label=f'Search Radius 50m', crs=ccrs.epsg(epsg_code))
gv_sample_poly_buf_25m = gv.Shape(
gdf_targetgeom_shapes.iloc[id].geometry.buffer(25, resolution=3, cap_style=1, join_style=1, mitre_limit=5.0),
label=f'Search Radius 25m', crs=ccrs.epsg(epsg_code))
c) Optional: Test Prepare point geoviews layer (slow)
%%time
def add_buffer(geom, buffer_num=50, return_poly=False):
'''Add small buffer around poly bounds
Buffer defaults to: 50m
'''
xmin, ymin, xmax, ymax = geom.bounds
xmin = xmin - buffer_num
ymin = ymin - buffer_num
xmax = xmax + buffer_num
ymax = ymax + buffer_num
if return_poly:
bbox_polygon = Polygon(
[[xmin, ymax],
[xmax, ymax],
[xmax, ymin],
[xmin, ymin]])
return bbox_polygon
return xmin, ymin, xmax, ymax
# adjust number for sample polygon
sample_polygon = gdf_targetgeom_shapes.iloc[id]
xmin, ymin, xmax, ymax = add_buffer(
sample_polygon.geometry)
# intersect/clip points based on bbox input
# this is very slow! we'll use an improved version for mass intersection
sample_point_gdf = lbsm_data_gdf.cx[xmin:xmax, ymin:ymax]
print(f'Intersections found for id {id}: {len(sample_point_gdf)} posts.')
# for automatic selection of polygon with many intersections
# use code block below
# max_intersect = 0
# max_id_ref = None
# for id in range(1,30):
# # adjust number for sample polygon
# sample_polygon = gdf_targetgeom_shapes.iloc[id]
# xmin, ymin, xmax, ymax = add_buffer(
# sample_polygon.geometry)
#
# # intersect/clip points based on bbox input
# sample_point_temp = lbsm_data_gdf.cx[xmin:xmax, ymin:ymax]
# number_of_intersections = len(sample_point_temp)
# if number_of_intersections > max_intersect:
# max_id_ref = id
# max_intersect = number_of_intersections
# sample_point_gdf = sample_point_temp
#
# # report
# print(f'Max Intersections found for id {max_id_ref}: {len(sample_point_gdf)} posts.')
d) Prepare Geoviews Layer for selected Sample Points
sample_point_selection = gv.Points(sample_point_gdf, crs=ccrs.epsg(epsg_code))
# sample_point_selection_25m = gv.Points(sample_point_gdf.loc[sample_point_gdf['origin_id'] == "2"], crs=ccrs.epsg(epsg_code))
# sample_point_selection_50m = gv.Points(sample_point_gdf.loc[sample_point_gdf['origin_id'].isin(["1", "3"])], crs=ccrs.epsg(epsg_code))
e) Output sample selection map
def set_active_tool(plot, element):
# enable wheel_zoom by default
plot.state.toolbar.active_scroll = plot.state.tools[0]
# the Overlay will automatically extent to data dimensions,
# the polygons of HDBSCAN cover the city area:
# since we want the initial zoom to be zoomed in to the selected sample geometry
# we need to supply xlim and ylim
# The Bokeh renderer only support Web Mercator Projection
# we therefore need to project Großer Garten Bounds first
# from WGS1984 to Web Mercator
sample_bottomleft = proj_transformer.transform(
xmin, ymin)
sample_topright = proj_transformer.transform(
xmax, ymax)
# to show 200m radius
# centre_line = pyproj.transform(
# pyproj.Proj(init=f'EPSG:{epsg_code}'),
# pyproj.Proj(init='EPSG:3857'),
# gdf_targetgeom_shapes.iloc[max_id_ref].geometry.centroid.x,
# gdf_targetgeom_shapes.iloc[max_id_ref].geometry.centroid.y)
hv.Overlay(
gv.tile_sources.EsriImagery * \
gv_sample_poly.opts(line_width=2, fill_alpha=0.5, line_color='red', show_legend=True) * \
gv_sample_poly_buf_25m.opts(line_width=2, fill_alpha=0, line_color='red', line_dash='dotted', show_legend=True) * \
gv_sample_poly_buf_50m.opts(line_width=2, fill_alpha=0, line_color='orange', line_dash='dotted', show_legend=True) * \
# sample_point_selection_50m.opts(line_color='orange', line_width=3, fill_alpha=0.5, tools=['hover'], show_legend=True) * \
# sample_point_selection_25m.opts(color='red', line_width=3, fill_alpha=0.5, tools=['hover'], show_legend=True) * \
# hv.Ellipse(centre_line[0], centre_line[1], 200, label='200m Buffer').opts(line_color='red', line_width=3, show_legend=True) * \
sample_point_selection.opts(tools=['hover'], size=5, color='yellow') # * \
# gv_testpoint
).opts(
width=800,
height=480,
finalize_hooks=[set_active_tool],
xlim=(sample_bottomleft[0], sample_topright[0]),
ylim=(sample_bottomleft[1], sample_topright[1])
)
We'll create an rtree with boxes for each line in the OSM Graph. This will be the most expensive operation, afterwards we can use Shapely's nearest function to snap points to a small number of possible lines. First, construct RTree for the geometry from osm paths. We'll extend the boundary by a small buffer surrounding the path so that we can catch points at both ends.
buffer_num
to your needs, minimum buffer_num should be the maximum SEARCH_RADIUS beeing used%%time
# Populate R-tree index with bounds of polygons
idx = index.Index()
for pos, poly in enumerate(gdf_targetgeom_shapes.geometry):
idx.insert(pos, add_buffer(poly, buffer_num=50))
Intersect our testpoint with the rtree
Next, we intersect our testpoint with the rtree and select those paths that are near the testpoint. We can define a search_radius. This is very fast because the heavy lifting is done by the spatial rtree intersection (idx.intersection((testpoint.coords[0]))
) while the expensive distance calculation from shapely (gdf.geometry[i].project(testpoint)
) is only done for the few paths objects that are preselected based on their boundary.
Since Instagram and Twitter have lower Geoaccuracies on average, we use a large Search Radius of 50m, wheras for Flickr we'll use 25m.
SEARCH_RADIUS_ORIGIN = {
"1":50,"2":25,"3":50
}
lbsm_data_gdf.head()
%%time
# Query testpoint to see which polygon it is in
# using first Rtree index, then Shapely geometry's within
# SEARCH_RADIUS = 25
# to look or other points here, override id:
id = 101
lbsm_post = lbsm_data_gdf.iloc[id]
SEARCH_RADIUS = SEARCH_RADIUS_ORIGIN.get(lbsm_post.origin_id)
testpoint = lbsm_post.geometry
gv_testpoint = gv.Points(
[testpoint], crs=ccrs.epsg(epsg_code), label='Testpoint'
).opts(tools=['hover'], size=5, color='yellow')
# calculate boundary to show
xmin, ymin, xmax, ymax = add_buffer(lbsm_post.geometry)
sample_bottomleft = proj_transformer.transform(
xmin, ymin)
sample_topright = proj_transformer.transform(
xmax, ymax)
# with rtree only
poly_idx_rtree = [i for i in idx.intersection((testpoint.coords[0]))]
# with search radius
poly_idx = [i for i in idx.intersection((testpoint.coords[0]))
if gdf_targetgeom_shapes.geometry[i].distance(testpoint) < SEARCH_RADIUS]
#if gdf.geometry[i].interpolate(gdf.geometry[i].project(testpoint)).distance(testpoint) > search_radius]
# without search radius, extact intersection
poly_idx_exact = [i for i in idx.intersection((testpoint.coords[0]))
if gdf_targetgeom_shapes.geometry[i].intersects(testpoint)]
# report
print(f'{len(poly_idx_rtree)} preselected polygons through rtree')
print(f'{len(poly_idx)} potentially relevant polygons found (within search radius {SEARCH_RADIUS}).')
print(f'{len(poly_idx_exact)} exact intersection polygons found.')
print(poly_idx)
gdf_targetgeom_shapes.iloc[poly_idx].head()
testpoly_sel = gv.Polygons(
[gv.Shape(feat) for feat in gdf_targetgeom_shapes.iloc[poly_idx].geometry],
crs=ccrs.epsg(epsg_code),
label=f'Found {len(poly_idx)} Target Polygons (search radius {SEARCH_RADIUS}m)')
testpoly_sel_exact = gv.Polygons(
[gv.Shape(feat) for feat in gdf_targetgeom_shapes.iloc[poly_idx_exact].geometry],
crs=ccrs.epsg(epsg_code),
label=f'Found {len(poly_idx_exact)} Target Polygons (exact intersection)')
hv.Overlay(
gv.tile_sources.EsriImagery * \
# gv_shapes.opts(line_color='white', line_width=1) * \
testpoly_sel.opts(line_color='yellow', line_width=3, fill_alpha=0, show_legend=True) * \
testpoly_sel_exact.opts(line_color='red', line_width=3, fill_alpha=0, show_legend=True, line_dash='dotted') * \
gv_testpoint.opts(tools=['hover'], size=10, color='red', show_legend=True)
#hv.Ellipse(gv_testpoint.x, gv_testpoint.y, 35, label='200m Buffer').opts(line_color='yellow', line_width=1, show_legend=True)
).opts(
width=800,
height=480,
xlim=(sample_bottomleft[0], sample_topright[0]),
ylim=(sample_bottomleft[1], sample_topright[1]),
title_format=f"Selected Target Geometries for Testpoint and {SEARCH_RADIUS}m Search Radius",
)
We execute this intersection for each point in our LBSM dataset. We update all counts in close polygons from the meiNGrün Target Geometries
1. Add statistics field to input geometries
gdf_targetgeom_shapes.head()
Initialize count structures
They're several things we want to measure per meinGrün target shape:
# init count structures
gdf_targetgeom_shapes['User_Posts'] = 0
gdf_targetgeom_shapes['User_Days'] = [set() for _ in range(len(gdf_targetgeom_shapes))]
gdf_targetgeom_shapes['User_Post_Locations'] = [set() for _ in range(len(gdf_targetgeom_shapes))]
gdf_targetgeom_shapes['InstagramCount'] = 0
gdf_targetgeom_shapes['FlickrCount'] = 0
gdf_targetgeom_shapes['TwitterCount'] = 0
gdf = gdf_targetgeom_shapes
gdf_targetgeom_shapes.head()
%%time
def get_userday(post):
"""Remove time from date-time string, e.g. 2019-01-01"""
if isinstance(post.post_date, str) and len(post.post_date) >= 10:
return f'{post.user_guid}:{post.post_date[:10]}'
return f'{post.user_guid}:1900-01-01'
def get_location(post_geom):
"""Concat lat:lng to string"""
return f'{post_geom.y}:{post_geom.x}'
def concat_userlocation(user_guid, location_str):
"""Concat user:lat:lng to string"""
return f'{user_guid}:{location_str}'
x = 0
x_i = 0
# get ids from column names
column_id_up = gdf.columns.get_loc("User_Posts")
column_id_ud = gdf.columns.get_loc("User_Days")
column_id_upl = gdf.columns.get_loc("User_Post_Locations")
col_id_instagram = gdf.columns.get_loc('InstagramCount')
col_id_flickr = gdf.columns.get_loc('FlickrCount')
col_id_twitter = gdf.columns.get_loc('TwitterCount')
origin_col_dict = {
"1":col_id_instagram, "2":col_id_flickr, "3":col_id_twitter}
# initialize dict lookup for location-poly-idx
latlng_dict_polyidx = dict()
total_records = len(lbsm_data_gdf)
for lbsm_record in lbsm_data_gdf.itertuples():
# print(lbsm_record.geometry)
# break
# update status
x += 1
msg_text = (
f'Processed records: {x} ({x/(total_records/100):.2f}%). '
f'Intersection found: {x_i}')
if x % 100 == 0:
clear_output(wait=True)
print(msg_text)
# get search radius based on currrent posts's origin
SEARCH_RADIUS = SEARCH_RADIUS_ORIGIN.get(
lbsm_record.origin_id)
# we save some processing speed by remembering
# repeated locations and sacrificing some ram
# e.g. spatial intersection is more expensive than dict-lookup
location_str = get_location(lbsm_record.geometry)
if location_str in latlng_dict_polyidx:
poly_idx = latlng_dict_polyidx[location_str]
else:
poly_idx = [i for i in idx.intersection((lbsm_record.geometry.coords[0]))
if gdf.geometry[i].distance(lbsm_record.geometry) < SEARCH_RADIUS]
# add to dict for improving speed for repetitive locations
latlng_dict_polyidx[location_str] = poly_idx
# count intersection
if len(poly_idx) > 0:
x_i += 1
userday = get_userday(lbsm_record)
userlocation = concat_userlocation(lbsm_record.user_guid, location_str)
# update poly count column
# the iloc indexer for Pandas Dataframe is used for integer-
# location based indexing / selection by position.
# this may update/increment multiple rows/records
origin_col = origin_col_dict.get(lbsm_record.origin_id)
gdf.iloc[poly_idx, origin_col] += 1
gdf.iloc[poly_idx, column_id_up] += 1
# the following 2 operations are quite expensive on memory/cpu
# disable if not needed
# gdf.iloc[poly_idx, column_id_ud].apply(lambda x: x.add(userday))
# gdf.iloc[poly_idx, column_id_upl].apply(lambda x: x.add(userlocation))
# final status
clear_output(wait=True)
print(msg_text)
Optional: store or load intermediate aggregated dataframe results
Store:
gdf.to_pickle(f"gdf_{CITY_NAME}_{LEVEL}_{TARGETSHAPE_VERSION}_{INTERSECT_VERSION}.pickle")
Load:
gdf = pd.read_pickle(f"gdf_{CITY_NAME}_{LEVEL}_{TARGETSHAPE_VERSION}_{INTERSECT_VERSION}.pickle")
gdf.head()
Preview results table
gdf[[TARGET_SHAPE_ID_COL, 'User_Posts', 'User_Days', 'User_Post_Locations', 'InstagramCount', 'FlickrCount', 'TwitterCount']].head()
Convert Sets to unique counts
gdf['UD'] = gdf.apply(lambda row: len(row.User_Days), axis=1)
gdf['UPL'] = gdf.apply(lambda row: len(row.User_Post_Locations), axis=1)
gdf[[TARGET_SHAPE_ID_COL, 'User_Posts', 'User_Days', 'User_Post_Locations', 'UD', 'UPL', 'InstagramCount', 'FlickrCount', 'TwitterCount']].head()
Remove set column, since they're not needed anymore:
gdf.drop(columns=['User_Post_Locations', 'User_Days'], inplace=True)
gdf.rename(columns={'User_Posts': 'UP'}, inplace=True)
gdf.rename(columns={'InstagramCount': 'UP_Instagram'}, inplace=True)
gdf.rename(columns={'FlickrCount': 'UP_Flickr'}, inplace=True)
gdf.rename(columns={'TwitterCount': 'UP_Twitter'}, inplace=True)
Out final Geodataframe with aggregated measurements per green target shape:
gdf[[TARGET_SHAPE_ID_COL, 'UP', 'UD', 'UPL', 'UP_Instagram', 'UP_Flickr', 'UP_Twitter']].head()
gdf[gdf.UP > 0].head()
Get Histogram
The histogram shows the typical long tail pattern: most shapes have few posts, but a few shapes have a lot of posts.
np.sqrt(np.sqrt(gdf['UP'])).hist(bins=20, alpha=0.5)
Absolute counts per shape are not very meaningful, since large shapes have a higher likeliness of intersecting with many posts.
Calculate buffered Area for search distances
# Get row for specific ID
id = 0
gdf.iloc[id].geometry
gdf.iloc[id].geometry.buffer(50, resolution=3, cap_style=1, join_style=1, mitre_limit=5.0)
print(f'{gdf.iloc[id].geometry.area:.2f} m²')
print(f'50m Search radius: {gdf.iloc[id].geometry.buffer(50, resolution=16, cap_style=1, join_style=1, mitre_limit=5.0).area:.2f} m²')
print(f'25m Search radius: {gdf.iloc[id].geometry.buffer(25, resolution=16, cap_style=1, join_style=1, mitre_limit=5.0).area:.2f} m²')
def m2_to_km2(m_num):
"""Convert square meters to square kilometres"""
return m_num/1000000
print(f'{m2_to_km2(gdf.iloc[id].geometry.area):.4f} km²')
print(f'50m Search radius: {m2_to_km2(gdf.iloc[id].geometry.buffer(50, resolution=16, cap_style=1, join_style=1, mitre_limit=5.0).area):.4f} km²')
print(f'25m Search radius: {m2_to_km2(gdf.iloc[id].geometry.buffer(25, resolution=16, cap_style=1, join_style=1, mitre_limit=5.0).area):.4f} km²')
Calculate km²
def shape_buf(geom, buf_dist_m):
"""Buffer geometry based on m distance"""
buf_geom = geom.buffer(buf_dist_m, resolution=16, cap_style=1, join_style=1, mitre_limit=5.0)
return buf_geom
gdf['area_skm'] = gdf['geometry'].apply(lambda x: m2_to_km2(x.area))
# same result:
# gdf['poly_area'] = gdf.geometry.area / 10**6
gdf['area_skm_SR25'] = gdf['geometry'].apply(lambda x: m2_to_km2(shape_buf(x, 25).area))
gdf['area_skm_SR50'] = gdf['geometry'].apply(lambda x: m2_to_km2(shape_buf(x, 50).area))
# gdf[[TARGET_SHAPE_ID_COL, 'UP', 'UD', 'UPL', 'UP_Instagram', 'UP_Flickr', 'UP_Twitter', 'area_skm', 'area_skm_SR25', 'area_skm_SR50']].head()
gdf[[TARGET_SHAPE_ID_COL, 'UP', 'UP_Instagram', 'UP_Flickr', 'UP_Twitter', 'area_skm', 'area_skm_SR25', 'area_skm_SR50']].head()
Normalize
def stretch_norm_dfcolumn(df, column, min_stretch, max_stretch):
"""Interpolates a Geodataframe column's values to a new range"""
min_val = df[column].min()
max_val = df[column].max()
# replace values in-place
# the lambda function is applied to all values from _column_
# df.loc[:, column] means: replace values of _column_ in-place (modifying the original dataframe)
# .loc[row_indexer,col_indexer]
df.loc[:, column] = df[column].apply(
lambda x: np.interp(x, (min_val, max_val), (min_stretch, max_stretch)))
# similar but not recommended:
# df[column] = np.interp(df[column], (min_val, max_val), (min_stretch, max_stretch))
# it is possible to 'flatten' the unequal distribution of values
# use one time sqrt for a bit of flatten, use two times sqrt for more flatten
gdf['UP_Norm'] = np.sqrt(gdf['UP']/gdf['area_skm'])
# gdf['UP_Norm'] = np.sqrt(np.sqrt(gdf['UP']/gdf['poly_area']))
# interpolate to 1 to 1000 range
stretch_norm_dfcolumn(gdf, 'UP_Norm', 1, 1000)
# print
gdf[[TARGET_SHAPE_ID_COL, 'UP', 'UP_Norm', 'UP_Instagram', 'UP_Flickr', 'UP_Twitter']].sort_values('UP_Norm', ascending=False).head()
# gdf[[TARGET_SHAPE_ID_COL, 'UP', 'UP_Norm', 'UD', 'UPL', 'UP_Instagram', 'UP_Flickr', 'UP_Twitter']].sort_values('UP_Norm', ascending=False).head()
Normalize Origin Influence
It becomes obvious that Instagram dominates results. We want to generate an even result between the 3 data sources.
Our next goal therefore is to allow only
We can do this by using the information from UP_Instagram, UP_Flickr, and UP_Twitter columns.
Since we've used a Search_Radius of 50m for Instagram and Twitter and a Search_Radius of 25m for Flickr,
we need to normalize count values separately based on origin and buffered area.
Remember the original percentage of the three datasources:
origin_perc_total = dict()
for key, value in size_count.items():
total_perc = value/(total_count/100)
print(f'Origin ID {key} with {value} posts, total percentage is: {total_perc:.2f}%')
origin_perc_total[key] = total_perc
Thus, we need to perform a two step normalization:
def norm_ip_origin(row):
total_up = row['UP']
# get one third of the total count of row
if total_up == 0:
return 0
# get total counts per origin
# normalize per search radius
# flatten curve using square
instagram_up = np.sqrt(row['UP_Instagram']/row['area_skm_SR50'])
flickr_up = np.sqrt(row['UP_Flickr']/row['area_skm_SR25'])
twitter_up = np.sqrt(row['UP_Twitter']/row['area_skm_SR50'])
total_up = instagram_up + flickr_up + twitter_up
# calc perc per origin percentage
# and apply manual weights
instagram_up_norm = instagram_up / (origin_perc_total.get("1") / 100) * 0.4
flickr_up_norm = flickr_up / (origin_perc_total.get("2") / 100) * 0.5
twitter_up_norm = twitter_up / (origin_perc_total.get("3") / 100) * 0.1
sum_norm = instagram_up_norm + flickr_up_norm + twitter_up_norm
return sum_norm
gdf['UP_Norm_Origin'] = gdf.apply(lambda row: norm_ip_origin(row), axis=1)
# gdf['UP_Norm_Origin'] = np.sqrt(gdf['UP_Norm_Origin']/gdf['poly_area'])
stretch_norm_dfcolumn(gdf, 'UP_Norm_Origin', 1, 1000)
Compare difference of UP_Norm and UP_Norm_Origin
# gdf[[TARGET_SHAPE_ID_COL, 'UP', 'UP_Norm', 'UP_Norm_Origin', 'UD', 'UPL', 'UP_Instagram', 'UP_Flickr', 'UP_Twitter']].sort_values('UP', ascending=False).head(20)
gdf[[TARGET_SHAPE_ID_COL, 'UP', 'UP_Norm', 'UP_Norm_Origin', 'UP_Instagram',
'UP_Flickr', 'UP_Twitter', 'area_skm', 'area_skm_SR25', 'area_skm_SR50']].sort_values('UP', ascending=False).head(20)
gdf['UP_Norm_Origin'].hist(bins=20, alpha=0.5)
# get top poly
top_norm = gdf.sort_values('UP_Norm_Origin', ascending=False).iloc[0]
top_norm_count = gdf.sort_values('UP', ascending=False).iloc[0]
# normalized
top_norm_poly = top_norm.geometry
top_norm_value = top_norm.UP_Norm_Origin
# raw
top_raw_poly_count = top_norm_count.geometry
top_raw_value_count = top_norm_count.UP
# construct geoviews layer
gv_sample_poly_top_norm = gv.Shape(
top_norm_poly,
label=f'meinGrün Top Target Polygon ({top_norm_value} Normalized Post Count based on Area and Service Origin)',
crs=ccrs.epsg(epsg_code))
# construct geoviews layer
gv_sample_poly_top_count = gv.Shape(
top_raw_poly_count,
label=f'meinGrün Top Target Polygon ({top_raw_value_count} Total Post Count)',
crs=ccrs.epsg(epsg_code))
# calculate boundary to show
xmin, ymin, xmax, ymax = add_buffer(top_norm_poly)
sample_bottomleft = proj_transformer.transform(
xmin, ymin)
sample_topright = proj_transformer.transform(
xmax, ymax)
hv.Overlay(
gv.tile_sources.EsriImagery * \
gv_sample_poly_top_count.opts(line_color='yellow', line_width=3, fill_alpha=0.2, show_legend=True) * \
gv_sample_poly_top_norm.opts(line_color='red', line_width=3, fill_alpha=0.2, show_legend=True)
).opts(
width=800,
height=480,
xlim=(sample_bottomleft[0], sample_topright[0]),
ylim=(sample_bottomleft[1], sample_topright[1]),
title_format=f"Selected Target Geometries for Testpoint and {SEARCH_RADIUS}m/50m Search Radius",
)
Now we can easily pass the GeoDataFrame to a Polygon object and declare the Measure as the first value dimension which it will color by:
if GREEN_TARGET_COL is not None:
gdf_meingruen_official_targets = gdf.query(
f'{GREEN_TARGET_COL} == 1')
# make a legend using a dictionary to assign colors
cmap = matplotlib.cm.get_cmap('Greens')
popularity_legend = {
'very high': cmap(1000/1000),
'high': cmap(450/1000),
'average': cmap(165/1000),
'low': cmap(56/1000),
'very low': cmap(14/1000),
'no data': cmap(1)}
def add_patches(legend):
ax = legend.axes
handles, labels = ax.get_legend_handles_labels()
labels.append("LBSM Popularity")
handles.append(Patch(facecolor="none", fill=False, edgecolor='none', linewidth=0))
for key in popularity_legend:
data_key = Patch(edgecolor='none', facecolor=popularity_legend[key])
handles.append(data_key)
labels.append(key)
labels.append("Official green target")
handles.append(Patch(facecolor="none", fill=False, edgecolor='red', linewidth=0.7))
legend._legend_box = None
legend._init_legend_box(handles, labels)
legend._set_loc(legend._loc)
legend.set_title(legend.get_title().get_text())
def get_map(lim_lng_min, lim_lat_min, lim_lng_max, lim_lat_max, title, gdf, column, show_green_targets = None):
"""Plot area in matplotlib (pyplot).
Note: Natural Breaks algorithm is used here to classify colors, remove to see raw values"""
bb_bottomleft = proj_transformer.transform(
lim_lng_min, lim_lat_min)
bb_topright = proj_transformer.transform(
lim_lng_max, lim_lat_max)
# bb_bottomleft = pyproj.transform(pyproj.Proj(init='EPSG:4326'), pyproj.Proj(init=f'EPSG:{epsg_code}'), lim_lng_min, lim_lat_min)
# bb_topright = pyproj.transform(pyproj.Proj(init='EPSG:4326'), pyproj.Proj(init=f'EPSG:{epsg_code}'), lim_lng_max, lim_lat_max)
fig, ax = plt.subplots()
fig.set_size_inches(18.5, 10.5, forward=True)
ax.set_title(f'LBSM Target Geometry Weights for {title}', fontsize=20)
ax.set_xlim([bb_bottomleft[0],bb_topright[0]])
ax.set_ylim([bb_bottomleft[1],bb_topright[1]])
# plt.legend(handles=patch_list)
plt.savefig('legend.png', bbox_inches='tight')
base = gdf.plot(column=column, figsize=(20,10), cmap='Greens', ax=ax, scheme="natural_breaks", label='Normalized User Post Locations (UPL) based on Area', legend = True)
if show_green_targets:
lgd = ax.legend(loc='upper right')
add_patches(lgd)
gdf_meingruen_official_targets.plot(ax=base, facecolor="none", edgecolor='red', lw=0.7, label='Official green target', legend = True)
# plot the data
def get_map_b(lim_lng_min, lim_lat_min, lim_lng_max, lim_lat_max, title, gdf, column, show_green_targets = None):
"""Plot area in matplotlib (pyplot).
Note: Natural Breaks algorithm is used here to classify colors, remove to see raw values"""
bb_bottomleft = proj_transformer.transform(
lim_lng_min, lim_lat_min)
bb_topright = proj_transformer.transform(
lim_lng_max, lim_lat_max)
# bb_bottomleft = pyproj.transform(pyproj.Proj(init='EPSG:4326'), pyproj.Proj(init=f'EPSG:{epsg_code}'), lim_lng_min, lim_lat_min)
# bb_topright = pyproj.transform(pyproj.Proj(init='EPSG:4326'), pyproj.Proj(init=f'EPSG:{epsg_code}'), lim_lng_max, lim_lat_max)
axes = plt.gca()
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5, forward=True)
# fig.suptitle('LBSM Target Geometry Weights for Dresden Altstadt', fontsize=20)
axes.set_title(f'LBSM Target Geometry Weights for {title}', fontsize=20)
axes.set_xlim([bb_bottomleft[0],bb_topright[0]])
axes.set_ylim([bb_bottomleft[1],bb_topright[1]])
base = gdf.plot(column=column, figsize=(20,10), cmap='Greens', ax=axes, scheme="natural_breaks", label='Normalized User Post Locations (UPL) based on Area', legend = True)
if show_green_targets:
gdf_meingruen_official_targets.plot(ax=base, facecolor="none", edgecolor='red', lw=0.7, label='Official green target', legend = True)
#gdf.to_crs({'init': 'epsg:3395'}).plot(column='UP_Norm', figsize=(20,10), cmap='Reds', ax=axes)
get_map(LIM_LNG_MIN, LIM_LAT_MIN, LIM_LNG_MAX, LIM_LAT_MAX, f'{CITY_NAME} {LEVEL} {TARGETSHAPE_VERSION}', gdf, 'UP_Norm_Origin', show_green_targets=False)
get_map(LIM_LNG_MIN_4, LIM_LAT_MIN_4, LIM_LNG_MAX_4, LIM_LAT_MAX_4, f"Heidelberg Center {LEVEL} {TARGETSHAPE_VERSION}", gdf, 'UP_Norm_Origin', show_green_targets=False)
get_map(LIM_LNG_MIN_1, LIM_LAT_MIN_1, LIM_LNG_MAX_1, LIM_LAT_MAX_1, f"Dresden Altstadt {LEVEL} {TARGETSHAPE_VERSION}", gdf, 'UP_Norm_Origin', show_green_targets=False)
get_map(LIM_LNG_MIN_2, LIM_LAT_MIN_2, LIM_LNG_MAX_2, LIM_LAT_MAX_2, f"Dresden Center {LEVEL} {TARGETSHAPE_VERSION}", gdf, 'UP_Norm_Origin', show_green_targets=False)
get_map(LIM_LNG_MIN_3, LIM_LAT_MIN_3, LIM_LNG_MAX_3, LIM_LAT_MAX_3, f"Dresden Neustadt {LEVEL} {TARGETSHAPE_VERSION}", gdf, 'UP_Norm_Origin', show_green_targets=False)
To save Figures above as SVG, run the following cell after each figure generation
plt.savefig(f"{CITY_NAME.lower()}_{LEVEL}_{TARGETSHAPE_VERSION}_A1.svg", format="svg")
if GREEN_TARGET_COL is not None:
gdf_filtered = gdf.query(f'{GREEN_TARGET_COL} == 1').copy()
# stretch_norm_dfcolumn(gdf_filtered, 'UP_Norm', 1, 1000)
# get_map(LIM_LNG_MIN_5, LIM_LAT_MIN_5, LIM_LNG_MAX_5, LIM_LAT_MAX_5, "Dresden", gdf_filtered, 'UP_Norm')
get_map_b(LIM_LNG_MIN_2, LIM_LAT_MIN_2, LIM_LNG_MAX_2, LIM_LAT_MAX_2, "Dresden Center", gdf_filtered, 'UP_Norm')
# Define the number of classes
n_classes = 6
# Create a Natural Breaks classifier
classifier = mc.NaturalBreaks.make(k=n_classes)
# classifier = mc.HeadTailBreaks.make()
# Classify the data
classifications = gdf[['UP_Norm']].apply(classifier)
gdf['UP_Norm_nb'] = classifications
classifications = gdf[['UP_Norm_Origin']].apply(classifier)
gdf['UP_Norm_Origin_nb'] = classifications
We'll also add bin labels for describing classes:
classes_ref_5 = {
5 : 'very high',
4 : 'high',
3 : 'average',
2 : 'low',
1 : 'very low',
0 : 'no data'}
gdf['popularity'] = gdf['UP_Norm_Origin_nb'].map(classes_ref_5)
# make sure that the bin "no data" is only assigned for records without UPL
gdf.loc[(gdf.UP_Norm_Origin > 1) & (gdf.UP_Norm_Origin_nb == 0), 'popularity'] = 'very low'
gdf.loc[(gdf.UP_Norm > 1) & (gdf.UP_Norm_nb == 0), 'UP_Norm_nb'] = 1
gdf.loc[(gdf.UP_Norm_Origin > 1) & (gdf.UP_Norm_Origin_nb == 0), 'UP_Norm_Origin_nb'] = 1
Have a look at the values and where we came from (left to right):
print(gdf[['UP', 'UP_Norm', 'UP_Norm_nb', 'UP_Norm_Origin', 'UP_Norm_Origin_nb', 'popularity']].head(20))
print(gdf[['UP', 'UP_Norm', 'UP_Norm_nb', 'UP_Norm_Origin', 'UP_Norm_Origin_nb', 'popularity']].sort_values('UP_Norm', ascending=False).head(20))
sns.countplot(x='UP_Norm_Origin_nb', hue='UP_Norm_Origin_nb', data=gdf)
Filter relevant target geometries
Create geoviews layer for final display. We'll filter segments by a threshold, as a means to only show streets with a minimum of popularity.
TARGET_green_target
# on mixed type input (e.g. 'Polygon', 'GeometryCollection')
# apply filter below to avoid Geopandas Exception
# gdf = gdf.loc[gdf['geometry'].apply(lambda x: isinstance(x, (Polygon, MultiPolygon)))]
if GREEN_TARGET_COL is None:
gdf_filtered = gdf
else:
gdf_filtered = gdf.query(f'{GREEN_TARGET_COL} == "true"').copy()
print(f'Number of total segments: {len(gdf)}. Number of filtered segments: {len(gdf_filtered)}')
gv_shapes_weighted = gv.Polygons(
gdf_filtered, vdims=['UP_Norm_Origin_nb'], crs=ccrs.epsg(epsg_code))
Final Output:
%%output filename="dresden_streets"
andresponsive=True
width=800
height=480
%%output filename=f"{TODAY}_{CITY_NAME}_green_targets_{TARGETSHAPE_VERSION}_{LEVEL}_weighted_{INTERSECT_VERSION}"
def set_active_tool(plot, element):
# enable wheel_zoom by default
plot.state.toolbar.active_scroll = plot.state.tools[0]
# the Overlay will automatically extent to data dimensions,
# since we want the initial zoom to be zoomed in to the Cite Center
# we need to supply xlim and ylim
# The Bokeh renderer only support Web Mercator Projection
# we therefore need to project Großer Garten Bounds first
# from WGS1984 Decimal Degrees coordinates to Web Mercator
# define Transformer ahead of time
# with xy-order of coordinates
proj_transformer_mercator = Transformer.from_crs(
"epsg:4326", "epsg:3857", always_xy=True)
bb_bottomleft = proj_transformer_mercator.transform(
LIM_LNG_MIN, LIM_LAT_MIN)
bb_topright = proj_transformer_mercator.transform(
LIM_LNG_MAX, LIM_LAT_MAX)
hv.Overlay(
gv.tile_sources.EsriImagery.opts(alpha=0.65) * \
gv_shapes_weighted.opts(line_width=0.1, cmap='Greens', color_index='UP_Norm_Origin_nb', colorbar=True, fill_alpha=0.9, color_levels=5)
# gv_shapes_weighted.opts(line_width=1+np.sqrt(dim('UP_Sqr')), cmap='Reds', color_index='UP_Norm', colorbar=True)
).opts(
title_format=f"Weighted meinGrün Target Shapes {TARGETSHAPE_VERSION} for {CITY_NAME} and {LEVEL} based on LBSM user-post-location proximity (based on {len(df)} posts from Flickr, Instagram, and Twitter)",
responsive=True,
finalize_hooks=[set_active_tool],
# width=800,
# height=480,
xlim=(bb_bottomleft[0],bb_topright[0]),
ylim=(bb_bottomleft[1],bb_topright[1])
)
Have a look at the header first:
gdf.head()
Select columns to export
sel_cols = [TARGET_SHAPE_ID_COL, 'geometry', 'UP', 'UP_Flickr', 'UP_Twitter', 'UP_Instagram', 'UD', 'UPL', 'UP_Norm', 'UP_Norm_Origin', 'popularity']
Write to Shapefile
# if not calculated, drop:
if 'UP' not in gdf_filtered.index and 'UD' in sel_cols:
sel_cols.remove('UD')
if 'UP' not in gdf_filtered.index and 'UPL' in sel_cols:
sel_cols.remove('UPL')
print(sel_cols)
gdf_filtered_columns = gdf_filtered[sel_cols].copy()
# define column types manually
# gdf_filtered_columns = gdf[sel_cols].copy()
# note that for clarity we rename UP to UPL (UserPostLocation) here
# gdf_filtered_columns.rename(columns = {'UP':'UPL'}, inplace = True)
filename = f"{TODAY}_meingruenshapes_{TARGETSHAPE_VERSION}_{CITY_NAME.lower()}_{LEVEL}_weighted_{INTERSECT_VERSION}"
schema = {'geometry': 'Polygon',
'properties': {
TARGET_SHAPE_ID_COL: 'str',
'UP': 'int',
'UP_Flickr': 'int',
'UP_Twitter': 'int',
'UP_Instagram': 'int',
# 'UD': 'int',
# 'UPL': 'int',
'UP_Norm': 'float',
'UP_Norm_Origin': 'float',
'popularity': 'str'}
}
gdf_filtered_columns.to_crs(crs_wgs).to_file(driver = 'ESRI Shapefile', schema=schema, filename= f"{filename}.shp")
Write to CSV
if 'geometry' in sel_cols:
sel_cols.remove('geometry')
sel_headers = [TARGET_SHAPE_ID_COL,'UP', 'UP_Flickr', 'UP_Twitter', 'UP_Instagram', 'UD', 'UPL', 'UP_Norm', 'UP_Norm_Origin', 'popularity']
if 'UP' not in gdf_filtered.index and 'UD' in sel_headers:
sel_headers.remove('UD')
if 'UP' not in gdf_filtered.index and 'UPL' in sel_headers:
sel_headers.remove('UPL')
gdf_filtered_columns.to_csv(f'{filename}.csv',
columns=sel_cols, header=sel_headers,
index=False)