The FB Places Graph is open for non-personal data (e.g. place locations, number of checkins, likes, place categories). However, it is limited to radial queries for lat/lng coordinates up to 5000m. If we want to query a bounding box, multiple points need to be queried in a grid and duplicate responses need to be merged. This Notebook will also include preview of data on a Geoviews map and output to HTML/CSV for further analysis.
import geoviews as gv
import geoviews.feature as gf
from pathlib import Path
from shapely.geometry import shape
from shapely.geometry import Point
import geopandas as gp
import pandas as pd
import holoviews as hv
hv.notebook_extension('bokeh')
First, create a grid for sequential loading of FB places per centerpoint:
from fiona.crs import from_epsg
from shapely.geometry import Polygon
import numpy as np
# load kreis shape for defining the boundary of grid
kreisgrenze = Path.cwd() / "Shapes" / "westsachsen_wgs1984.shp"
polys = gp.read_file(kreisgrenze)
# Reproject the geometries by replacing the values with projected ones
# We want to use distance in meters to define the grid-size
# use WGS 84 / UTM zone 33N (EPSG Code 32633)
polys['geometry'] = polys['geometry'].to_crs(epsg=32633)
xmin,ymin,xmax,ymax = polys.total_bounds
# define grid size
lenght = 2000
width = 2000
cols = list(range(int(np.floor(xmin)), int(np.ceil(xmax)), width))
rows = list(range(int(np.floor(ymin)), int(np.ceil(ymax)), lenght))
rows.reverse()
polygons = []
for x in cols:
for y in rows:
polygons.append( Polygon([(x,y), (x+width, y), (x+width, y-lenght), (x, y-lenght)]) )
# convert polygons to geodataframe
grid = gp.GeoDataFrame({'geometry':polygons})
# set coordinate system of geodataframe
grid.crs = from_epsg(32633)
# project back to WGS1984 for display in geoviews
# and coordinates in decimal degrees (lat, lng)
grid['geometry'] = grid['geometry'].to_crs(epsg=4236)
grid.crs = from_epsg(4236)
Optional save grid to file
print(grid.crs)
print(grid.head())
Create Geoviews layer from Geodataframe
gridlayer = gv.Polygons(grid)
hv.Overlay(
gv.tile_sources.Wikipedia * \
#gv.tile_sources.EsriImagery * \
gridlayer.opts(alpha=0.5)
).opts(
width=800,
height=480
)
Get centers of polygons with:
print(len(grid))
for index, poly in grid['geometry'].iteritems():
print(type(poly))
print(poly.centroid)
if index > 5:
break
Old approach: use query grid from lat/lng list (from ArcGIS fishnet)
Before querying FB places, we need to authenticate with a valid Facebook login. The Token will be used to sign queries.
Get/Renew Access token: https://developers.facebook.com/tools/explorer/145634995501895/
Select App Token
fb_access_token = 'EAACeMtszp0EBABykbhKaZA6QAFdZAMToxWvS5kxfKWE17xqyhOv20dUU0wd0j7he7e8D14j8Jdm6485WYxu8bQI59DILPTnQda9VGfmyTE9NmEHgZAwuyy3ys1X7DpS8iBt0itLtPXXzJ1WjO5BitBrIz5ZAmTGt3nXN2NKy74UgGUPsm0STjAyZCLhCxB1eSLENjr8JwAMK9ZAbEAcfIy'
import math
circle_radius = 2000*math.sqrt(2)/2
print(circle_radius)
import requests
import json
query_url = 'https://graph.facebook.com/v3.2/search'
params = dict(
type='place',
center='0,0',
distance=1500,
fields='name,checkins,picture,about,context,link,location,engagement,is_verified'
'overall_star_rating,rating_count,hours,description,category_list,single_line_address,cover',
limit=500,
access_token=fb_access_token
)
# load for intersection
kreisgrenze = Path.cwd() / "Shapes" / "westsachsen_wgs1984.shp"
kreis_polys = gp.read_file(kreisgrenze)
import time
# enter here start and end index
# from_index is the first index that will be parsed
# to_index is the last index that will be parsed
from_index = 1265
to_index = 1600
for index, poly in grid['geometry'].iteritems():
if index < from_index:
# skip already existing
continue
if not any(kreis_polys.intersects(poly)):
# skip grid cells that are not in Westsachsen
continue
grid_center = poly.centroid
params['center'] = f'{grid_center.y},{grid_center.x}'
successful = False
while not successful:
response = requests.get(url=query_url, params=params)
json_data = json.loads(response.text)
if json_data.get("error") and (
json_data.get("error").get("code") == 4 or json_data.get("error").get("code") == 613):
# rate limit notice
print("Waiting 2 Minutes for Rate Limit")
time.sleep(120)
else:
successful = True
# process results
with open(Path.cwd() / 'JSON_Data' / f'{index:03d}_json.json', 'w') as outfile:
json.dump(json_data, outfile)
# output reporting
places_json = json.loads(response.text)["data"]
print(f'Retrieved {len(places_json)} places for grid {index:03d} ({grid_center.y}, {grid_center.x})')
time.sleep(25)
if index > to_index:
break
This query will list all FB page categories and respective hierarchy.
query_url = 'https://graph.facebook.com/v3.2/fb_page_categories'
params = dict(
access_token=fb_access_token
)
response = requests.get(url=query_url, params=params)
json_data = json.loads(response.text)
with open(Path.cwd() / f'FB_categories_root_json.json', 'w') as outfile:
json.dump(json_data, outfile)
List all categories
for main_cat in json_data["data"]:
print(f'ID {main_cat.get("id")} - {main_cat.get("name")}')
print(' Subcats:')
subcats = main_cat.get('fb_page_categories')
if subcats:
for sub_cat in subcats:
print(f' ID {sub_cat.get("id")} - {sub_cat.get("name")}')
These categories appear to be of some interest to planning:
ID 964585346994407 - Sports
ID 226326230802065 - Charity Organization
ID 186004504854452 - Country Club / Clubhouse
ID 170968163319233 - Community Service
ID 191523214199822 - Environmental Conservation Organization
ID 189018581118681 - Sports Club
ID 181053558607965 - Youth Organization
ID 2235 - Non-Governmental Organization (NGO)
ID 2603 - Nonprofit Organization
ID 147714868971098 - Public & Government Service
ID 133436743388217 - Arts & Entertainment
ID 186982054657561 - Sports & Recreation
Main: ID 683513901834713 - Non-Business Places
ID 1713595685546855 - City Infrastructure
ID 1874409019452971 - Locality
ID 635235176664335 - Outdoor Recreation
ID 2607 - Religious Place of Worship
ID 209889829023118 - Landmark & Historical Place
planning_cats = ['964585346994407', '226326230802065','186004504854452','170968163319233','191523214199822','189018581118681','181053558607965','2235',
'2603','147714868971098','133436743388217','186982054657561','683513901834713','1713595685546855','1874409019452971','635235176664335',
'2607','209889829023118']
import os
import json
placelist = []
for file in os.listdir('JSON_Data'):
if file.endswith(".json"):
with open(Path.cwd() / 'JSON_Data' / file, 'r') as infile:
json_data = json.load(infile)
placelist += json_data["data"]
len(placelist)
from collections import namedtuple
PlaceTuple = namedtuple('PlaceTuple', 'Latitude Longitude PlaceGuid PlaceName Checkins Likes URL PlaceCats Caption City Country Street Zip')
place_tuples = []
skippedCount = 0
place_already_inserted = set()
for place in placelist:
#print(place)
if place.get('name'):
iLocName = place["name"]
else:
# skip problematic entries
skippedCount += 1
continue
if place.get('checkins'):
iLocCheckins = place["checkins"] if isinstance(
place["checkins"], int) else None
else:
iLocCheckins = None
if place.get('engagement'):
iLocLikes = place["engagement"]["count"] if isinstance(
place["engagement"]["count"], int) else None
else:
iLocLikes = 0
if place.get('description'):
iLocDescription = place["description"]#.replace(
#'\n', ' ').replace('\r', ' ')#[:50]
elif place.get('about'):
iLocDescription = place["about"]
else:
iLocDescription = None
if place.get('id'):
place_guid = place["id"]
if place_guid in place_already_inserted:
# skip duplicates
skippedCount += 1
continue
else:
place_already_inserted.add(place_guid)
else:
# skip problematic entries
skippedCount += 1
continue
if place.get('category_list'):
place_types = []
for cat in place["category_list"]:
#print(cat.get('id'))
#print(cat.get('name'))
#print(type(cat))
place_types.append(tuple((cat.get('id'), cat.get('name'))))
else:
place_types = None
#print(type(place_type))
if place.get('location'):
if place.get('location', {}).get('city'):
iLocCityName = place["location"]["city"]
else:
iLocCityName = None
iLocCityID = None
if place.get('location', {}).get('country'):
iLocCountryName = place["location"]["country"]
else:
iLocCountryName = None
iLocCountryID = None
iLocLat = float(place["location"]["latitude"]) #Decimal(place["location"]["latitude"])
iLocLng = float(place["location"]["longitude"]) #Decimal()
if not any(kreis_polys.intersects(Point(iLocLng, iLocLat))):
continue
if place.get('location', {}).get('street'):
iLocStreet = place["location"]["street"]
else:
iLocStreet = None
if place.get('location', {}).get('zip'):
iLocZip = place["location"]["zip"]
else:
iLocZip = None
else:
count_non_geotagged += 1
photo_locID = None
photo_locName = None
photo_geoaccuracy = None
# skip non geotagged
if excludeWhereGeoInfo_isMissing:
continue
if place.get('link'):
#place_shortcode = place["link"].rsplit('/',2)[1]
place_url = place["link"]
else:
place_url = None
place_tuples.append(
PlaceTuple(iLocLat, #Latitude = 1
iLocLng, #Longitude = 2
place_guid,#PlaceGuid = 3
iLocName,#PlaceName = 4
iLocCheckins, #Checkins = 5
int(iLocLikes),#Likes = 6
place_url,#URL = 7
"".join(";" + str(x[0]) + ":" + str(
x[1]) + ";" for x in place_types if not place_types is None).replace(";;",";"), #PlaceCats = 9
iLocDescription,#Caption = 11
iLocCityName,#City = 12
iLocCountryName,#Country = 13
iLocStreet,#Street = 14
iLocZip,#Zip = 15
))
print(f'{len(place_tuples)} places extracted. Skipped {skippedCount} (e.g. duplicate entries)')
pd_place_tuples = pd.DataFrame(place_tuples)
Preview Density
geometry_point = [Point(xy) for xy in zip(pd_place_tuples.Longitude, pd_place_tuples.Latitude)]
pd_place_tuples = pd_place_tuples.drop(['Longitude', 'Latitude'], axis=1)
crs_1984 = {'init': 'epsg:4326'}
gdf = gp.GeoDataFrame(pd_place_tuples, crs=crs_1984, geometry=geometry_point)
dfsjoin = gp.sjoin(kreis_polys, gdf)
dfsjoin.head()
def bin_the_midpoints(bins, midpoints):
b = bins.copy()
m = midpoints.copy()
reindexed = b.reset_index().rename(columns={'index':'bins_index'})
joined = gp.tools.sjoin(reindexed, m)
bin_stats = joined.groupby('bins_index')['PlaceGuid'].agg({'fold': len, 'PlaceGuid': np.min})
return gp.GeoDataFrame(b.join(bin_stats))
bin_stats = bin_the_midpoints(kreis_polys, gdf)
bin_stats.head()
from holoviews import dim, opts
def set_active_tool(plot, element):
# enable wheel_zoom by default
plot.state.toolbar.active_scroll = plot.state.tools[0]
hv.Overlay(
gv.tile_sources.EsriImagery.opts(alpha=0.5) * \
gv.Polygons(dfsjoin).opts(fill_alpha=0, color='SelCat')
).opts(
finalize_hooks=[set_active_tool],
width=1000,
height=480,
)
Order Place Categories:
place_cats = pd_place_tuples["PlaceCats"].str.split(';', expand=True, n=2)[1].str.split(':', expand=True, n=2)[1]
cat_count = place_cats.value_counts()
#cat_count.where(cat_count>3)
cat_count[:20]
# get first category
pd_place_tuples_display = pd_place_tuples.copy()
pd_place_tuples_display['first_cat'] = pd_place_tuples["PlaceCats"].str.split(';', expand=True, n=2)[1].str.split(':', expand=True, n=2)[1]
# limit caption to 50 characters
pd_place_tuples_display['Caption'] = pd_place_tuples_display['Caption'].str[:50]
# scale symbols by sqrt (use np, not math.sqrt!)
pd_place_tuples_display['pt_size'] = 4+np.sqrt(pd_place_tuples_display['Checkins'])/5
Append Count of Unique for main cats to dataframe (used to color main categories)
pd_place_tuples_display['cat_count'] = cat_count.where(cat_count>87).get(pd_place_tuples_display['first_cat']).to_list()
pd_place_tuples_display['main_cats'] = pd_place_tuples_display['first_cat']
pd_place_tuples_display.tail()
# replace all cats that appear less than 1 times by 'other'
pd_place_tuples_display.loc[pd_place_tuples_display['cat_count'].isnull(), 'main_cats'] = 'Other'
pd_place_tuples_display.tail()
len(pd_place_tuples_display.main_cats.unique())
fb_place_data_gv = gv.Dataset(
pd_place_tuples_display,
kdims=['Longitude', 'Latitude', 'PlaceGuid',
'PlaceName', 'Checkins', 'Likes',
'URL', 'first_cat', 'cat_count', 'main_cats', 'Caption', 'City',
'Country', 'Street', 'Zip', 'PlaceCats', 'pt_size'])
fb_place_data_gv
%%opts Overlay [legend_position='left']
from holoviews import dim, opts
from math import sqrt
from cartopy import crs as ccrs
#from bokeh.models import Legend
def set_active_tool(plot, element):
# enable wheel_zoom by default
plot.state.toolbar.active_scroll = plot.state.tools[0]
#from bokeh.models import HoverTool
#hover = HoverTool(tooltips=[("index", "$index")])
#use tools=[hover]]
hv.Overlay(
gv.tile_sources.EsriImagery.opts(alpha=0.5) * \
fb_place_data_gv.to(
gv.Points,
kdims=['Longitude', 'Latitude'],
vdims=['PlaceName', 'PlaceGuid', 'PlaceCats', 'Checkins', 'Likes',
'URL', 'first_cat', 'cat_count', 'main_cats', 'Caption', 'City', 'Country',
'Street', 'Zip', 'pt_size'],
crs=ccrs.PlateCarree()).opts(tools=['hover'], size='pt_size', color='main_cats', cmap='tab20c')
).opts(
width=1000,
height=480,
finalize_hooks=[set_active_tool]
)
Select all cities:
pd_place_city = pd_place_tuples[pd_place_tuples.PlaceCats.str.contains("2404|2401")==True]
fb_city_data_gv = gv.Dataset(
pd_place_city,
kdims=['Longitude', 'Latitude', 'PlaceGuid',
'PlaceName', 'Checkins', 'Likes',
'URL','Caption', 'City',
'Country', 'Street', 'Zip', 'PlaceCats'])
print(len(pd_place_city))
pd_place_city.head()
Select Outdoor Recreation:
def make_gv_layer(pd_dataframe):
'''Create Geoviews layer from Pandas dataframe'''
gv_layer = gv.Dataset(
pd_dataframe,
kdims=['Longitude', 'Latitude', 'PlaceGuid',
'PlaceName', 'Checkins', 'Likes',
'URL','Caption', 'City',
'Country', 'Street', 'Zip', 'PlaceCats'])
return gv_layer
def select_create_gv_layer(id_string_select):
pd_place_select = pd_place_tuples[pd_place_tuples.PlaceCats.str.contains(id_string_select)]
fb_select_data_gv = make_gv_layer(
pd_place_select)
print(len(fb_select_data_gv))
return fb_select_data_gv
fb_recreation_data_gv = select_create_gv_layer("635235176664335")
Select sports category:
fb_sport_data_gv = select_create_gv_layer("186982054657561|189018581118681|964585346994407")
Select Landmarks:
fb_landmark_data_gv = select_create_gv_layer("209889829023118")
Select Community Service:
fb_community_data_gv = select_create_gv_layer("170968163319233|191523214199822|181053558607965|2235|2603")
Select Arts & Entertainment:
fb_arts_data_gv = select_create_gv_layer("133436743388217")
Select City Infrastructure:
fb_infrastructure_data_gv = select_create_gv_layer("1713595685546855|1874409019452971|147714868971098|2235|2603|226326230802065|186004504854452")
%%opts Points [tools=['hover']]
from cartopy import crs as ccrs
from holoviews import dim, opts
def set_active_tool(plot, element):
# enable wheel_zoom by default
plot.state.toolbar.active_scroll = plot.state.tools[0]
def make_layer(input_gv_layer, layer_name):
'''Format Holoviews/Geoviews Layer with vdims and kdims'''
formatted_layer = input_gv_layer.to(
gv.Points,
kdims=['Longitude', 'Latitude'],
vdims=['PlaceName', 'PlaceGuid', 'PlaceCats', 'Checkins', 'Likes',
'URL', 'Caption', 'City', 'Country',
'Street', 'Zip'],
crs=ccrs.PlateCarree(),
label=layer_name)
return formatted_layer
# see https://matplotlib.org/api/markers_api.html for markers
hv.Overlay(
gv.tile_sources.EsriImagery.opts(alpha=0.5) * \
make_layer(fb_infrastructure_data_gv, 'Infrastructure & Service').opts(size=4+dim('Likes')/1500, color='blue', marker='o') * \
make_layer(fb_city_data_gv, 'city').opts(size=2+dim('Likes')/5000, color='darkred', marker='o')
).opts(
width=1000,
height=480,
finalize_hooks=[set_active_tool],
title='Selected Facebook Places categories in Westsachsen'
)
points1 = make_layer(fb_infrastructure_data_gv, 'Infrastructure & Service')
points2 = make_layer(fb_city_data_gv, 'city')
def update_selcat_col(df, id_string_select, place_cat_name):
#df[df.PlaceCats.str.contains(id_string_select)].SelCat = place_cat_name
df.loc[df.PlaceCats.str.contains(id_string_select), 'SelCat'] = place_cat_name
#return df
pd_places_display_cat = pd_place_tuples.copy()
pd_places_display_cat['Caption'] = pd_place_tuples['Caption'].str[:256]
pd_places_display_cat["SelCat"] = None
update_selcat_col(pd_places_display_cat, "186982054657561|189018581118681|964585346994407", "Sport")
update_selcat_col(pd_places_display_cat, "1713595685546855|1874409019452971|147714868971098|2235|2603|226326230802065|186004504854452", "Infrastructure & Service")
update_selcat_col(pd_places_display_cat, "133436743388217", "Arts & Entertainment")
update_selcat_col(pd_places_display_cat, "170968163319233|191523214199822|181053558607965|2235|2603", "Community Service")
update_selcat_col(pd_places_display_cat, "635235176664335", "Recreation")
update_selcat_col(pd_places_display_cat, "209889829023118", "Landmark")
update_selcat_col(pd_places_display_cat, "2404|2401", "City")
# replace all other labels
pd_places_display_cat.SelCat.fillna("Other", inplace=True)
pd_places_display_cat.head()
%%opts Points [tools=['hover']] Overlay [title_format="All Facebook Places and selected categories in Westsachsen (Public Facebook Place Graph API)"]
%%output filename="westsachsen_fb_places"
from holoviews.operation.stats import bivariate_kde
from cartopy import crs as ccrs
from holoviews import dim, opts
hv.notebook_extension('bokeh')
def set_active_tool(plot, element):
# enable wheel_zoom by default
plot.state.toolbar.active_scroll = plot.state.tools[0]
gv_layer = gv.Dataset(
pd_places_display_cat,
kdims=['Longitude', 'Latitude', 'PlaceGuid',
'PlaceName', 'Checkins', 'Likes',
'URL','Caption', 'City',
'Country', 'Street', 'Zip', 'PlaceCats', 'SelCat'])
# colormaps: http://build.holoviews.org/user_guide/Colormaps.html
hv.Overlay(
gv.tile_sources.EsriImagery.opts(alpha=0.5) * \
gv.Polygons(kreis_polys).opts(fill_alpha=0) * \
#bivariate_kde(gv_layer, bandwidth=0.1).opts(show_legend=False, cmap='Blues', alpha=0.7) * \
gv_layer.to(
gv.Points,
kdims=['Longitude', 'Latitude'],
vdims=['PlaceName', 'PlaceGuid', 'PlaceCats', 'Checkins', 'Likes',
'URL', 'Caption', 'City', 'Country',
'Street', 'Zip', 'SelCat'],
crs=ccrs.PlateCarree()).opts(size=2.5+dim('Likes')/7000,
color='SelCat', cmap='Category20'),
).opts(
#responsive=True,
finalize_hooks=[set_active_tool],
width=1000,
height=480,
)