Basic & Econometrics - Spatial data - visualization
def point_inside_polygon(lat,lng,poly):
p1 = Point(lng,lat)
if p1.within(poly):
return True
else:
return False
def get_neighbourhood(row,gdf):
for ix, area in gdf.iterrows():
is_in_area=False
if row['lat'] and row['lon'] and row['lat']!=0 and row['lon']!=0:
is_in_area = point_inside_polygon(row['lat'], row['lon'], area['geometry'])
if is_in_area:
#found area, exit
return area['name']
return None
import pandas as pd
import numpy as np
import geopandas as gpd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
from tqdm import tqdm
import matplotlib.pyplot as plt
tqdm.pandas()
from shapely.geometry import Point, Polygon
data_path = '../data/'
/Users/ida/opt/anaconda3/lib/python3.8/site-packages/tqdm/std.py:697: FutureWarning: The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version
from pandas import Panel
def convert_columns_to_lowercase(df):
df = df.rename(columns = {k:k.lower() for k in df.columns})
return df
df_covid = pd.read_csv(data_path + 'COVID19_by_Neighborhood.csv')
df_covid = convert_columns_to_lowercase(df_covid)
renameDict = {'CERT':'cert1'}
df_covid = df_covid.rename(columns = renameDict)
{k:k.lower() for k in df_covid.columns}
{'FID': 'fid',
'COMTY_NAME': 'comty_name',
'cert1': 'cert1',
'Shape_Leng': 'shape_leng',
'Shape_Area': 'shape_area',
'location': 'location',
'cases': 'cases',
'case_rate': 'case_rate',
'deaths': 'deaths',
'death_rate': 'death_rate',
'Date': 'date',
'Shape__Area': 'shape__area',
'Shape__Length': 'shape__length'}
df_covid.head()
FID | COMTY_NAME | cert1 | Shape_Leng | Shape_Area | location | cases | case_rate | deaths | death_rate | Date | Shape__Area | Shape__Length | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | SYLMAR | yes | 111633.753407 | 3.404014e+08 | SYLMAR | 14535 | 17640 | 184 | 223 | 2021/01/30 | 4.647324e+07 | 41236.137343 |
1 | 2 | GRANADA HILLS | yes | 89564.685903 | 4.264128e+08 | GRANADA HILLS | 5745 | 9873 | 137 | 235 | 2021/01/30 | 5.818514e+07 | 33094.502125 |
2 | 3 | PORTER RANCH | yes | 73616.951350 | 2.267371e+08 | PORTER RANCH | 2140 | 6013 | 23 | 65 | 2021/01/30 | 3.093173e+07 | 27183.552463 |
3 | 4 | LAKEVIEW TERRACE | yes | 75477.956189 | 1.448471e+08 | LAKEVIEW TERRACE | 2116 | 16112 | 30 | 228 | 2021/01/30 | 1.975813e+07 | 27863.458449 |
4 | 5 | MISSION HILLS | yes | 50728.509850 | 1.061586e+08 | MISSION HILLS | 3848 | 15951 | 96 | 398 | 2021/01/30 | 1.447757e+07 | 18746.395093 |
%%bash
cd ../data/LA_Times_Neighborhood_Boundaries-shp
ls
cfbcc20d-2c5d-4c30-9dfa-627d46ec1a742020328-1-9ulknm.pzqsm.cpg
cfbcc20d-2c5d-4c30-9dfa-627d46ec1a742020328-1-9ulknm.pzqsm.dbf
cfbcc20d-2c5d-4c30-9dfa-627d46ec1a742020328-1-9ulknm.pzqsm.prj
cfbcc20d-2c5d-4c30-9dfa-627d46ec1a742020328-1-9ulknm.pzqsm.shp
cfbcc20d-2c5d-4c30-9dfa-627d46ec1a742020328-1-9ulknm.pzqsm.shx
gdf = gpd.read_file(data_path + 'LA_Times_Neighborhood_Boundaries-shp/cfbcc20d-2c5d-4c30-9dfa-627d46ec1a742020328-1-9ulknm.pzqsm.shp')
gdf = convert_columns_to_lowercase(gdf)
gdf.head()
objectid | name | geometry | |
---|---|---|---|
0 | 1 | Adams-Normandie | POLYGON ((-118.30069 34.03731, -118.30388 34.0... |
1 | 2 | Arleta | POLYGON ((-118.44255 34.26412, -118.44240 34.2... |
2 | 3 | Arlington Heights | POLYGON ((-118.31862 34.05306, -118.31673 34.0... |
3 | 4 | Atwater Village | MULTIPOLYGON (((-118.27886 34.15321, -118.2788... |
4 | 5 | Baldwin Hills/Crenshaw | POLYGON ((-118.36102 34.02532, -118.36026 34.0... |
gdf.plot(figsize = (10,10))
<AxesSubplot:>
df_crime = pd.read_csv(data_path + 'Crime_Data_from_2020_to_Present.csv')
df_crime = convert_columns_to_lowercase(df_crime)
df_crime.head()
dr_no | date rptd | date occ | time occ | area | area name | rpt dist no | part 1-2 | crm cd | crm cd desc | mocodes | vict age | vict sex | vict descent | premis cd | premis desc | weapon used cd | weapon desc | status | status desc | crm cd 1 | crm cd 2 | crm cd 3 | crm cd 4 | location | cross street | lat | lon | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 10304468 | 01/08/2020 12:00:00 AM | 01/08/2020 12:00:00 AM | 2230 | 3 | Southwest | 377 | 2 | 624 | BATTERY - SIMPLE ASSAULT | 0444 0913 | 36 | F | B | 501.0 | SINGLE FAMILY DWELLING | 400.0 | STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE) | AO | Adult Other | 624.0 | NaN | NaN | NaN | 1100 W 39TH PL | NaN | 34.0141 | -118.2978 |
1 | 190101086 | 01/02/2020 12:00:00 AM | 01/01/2020 12:00:00 AM | 330 | 1 | Central | 163 | 2 | 624 | BATTERY - SIMPLE ASSAULT | 0416 1822 1414 | 25 | M | H | 102.0 | SIDEWALK | 500.0 | UNKNOWN WEAPON/OTHER WEAPON | IC | Invest Cont | 624.0 | NaN | NaN | NaN | 700 S HILL ST | NaN | 34.0459 | -118.2545 |
2 | 201220752 | 09/16/2020 12:00:00 AM | 09/16/2020 12:00:00 AM | 1230 | 12 | 77th Street | 1259 | 2 | 745 | VANDALISM - MISDEAMEANOR ($399 OR UNDER) | 2004 1820 0913 0329 1202 | 62 | M | B | 502.0 | MULTI-UNIT DWELLING (APARTMENT, DUPLEX, ETC) | NaN | NaN | IC | Invest Cont | 745.0 | NaN | NaN | NaN | 700 E 73RD ST | NaN | 33.9739 | -118.2630 |
3 | 191501505 | 01/01/2020 12:00:00 AM | 01/01/2020 12:00:00 AM | 1730 | 15 | N Hollywood | 1543 | 2 | 745 | VANDALISM - MISDEAMEANOR ($399 OR UNDER) | 0329 1402 | 76 | F | W | 502.0 | MULTI-UNIT DWELLING (APARTMENT, DUPLEX, ETC) | NaN | NaN | IC | Invest Cont | 745.0 | 998.0 | NaN | NaN | 5400 CORTEEN PL | NaN | 34.1685 | -118.4019 |
4 | 191921269 | 01/01/2020 12:00:00 AM | 01/01/2020 12:00:00 AM | 415 | 19 | Mission | 1998 | 2 | 740 | VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA... | 0329 | 31 | X | X | 409.0 | BEAUTY SUPPLY STORE | NaN | NaN | IC | Invest Cont | 740.0 | NaN | NaN | NaN | 14400 TITUS ST | NaN | 34.2198 | -118.4468 |
def sum_coordinates(row):
return row['lat']+ row['lon']
df_crime['sumcoord'] = df_crime.apply(lambda row: sum_coordinates(row),axis = 1)
df_crime.head()
dr_no | date rptd | date occ | time occ | area | area name | rpt dist no | part 1-2 | crm cd | crm cd desc | mocodes | vict age | vict sex | vict descent | premis cd | premis desc | weapon used cd | weapon desc | status | status desc | crm cd 1 | crm cd 2 | crm cd 3 | crm cd 4 | location | cross street | lat | lon | sumcoord | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 10304468 | 01/08/2020 12:00:00 AM | 01/08/2020 12:00:00 AM | 2230 | 3 | Southwest | 377 | 2 | 624 | BATTERY - SIMPLE ASSAULT | 0444 0913 | 36 | F | B | 501.0 | SINGLE FAMILY DWELLING | 400.0 | STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE) | AO | Adult Other | 624.0 | NaN | NaN | NaN | 1100 W 39TH PL | NaN | 34.0141 | -118.2978 | -84.2837 |
1 | 190101086 | 01/02/2020 12:00:00 AM | 01/01/2020 12:00:00 AM | 330 | 1 | Central | 163 | 2 | 624 | BATTERY - SIMPLE ASSAULT | 0416 1822 1414 | 25 | M | H | 102.0 | SIDEWALK | 500.0 | UNKNOWN WEAPON/OTHER WEAPON | IC | Invest Cont | 624.0 | NaN | NaN | NaN | 700 S HILL ST | NaN | 34.0459 | -118.2545 | -84.2086 |
2 | 201220752 | 09/16/2020 12:00:00 AM | 09/16/2020 12:00:00 AM | 1230 | 12 | 77th Street | 1259 | 2 | 745 | VANDALISM - MISDEAMEANOR ($399 OR UNDER) | 2004 1820 0913 0329 1202 | 62 | M | B | 502.0 | MULTI-UNIT DWELLING (APARTMENT, DUPLEX, ETC) | NaN | NaN | IC | Invest Cont | 745.0 | NaN | NaN | NaN | 700 E 73RD ST | NaN | 33.9739 | -118.2630 | -84.2891 |
3 | 191501505 | 01/01/2020 12:00:00 AM | 01/01/2020 12:00:00 AM | 1730 | 15 | N Hollywood | 1543 | 2 | 745 | VANDALISM - MISDEAMEANOR ($399 OR UNDER) | 0329 1402 | 76 | F | W | 502.0 | MULTI-UNIT DWELLING (APARTMENT, DUPLEX, ETC) | NaN | NaN | IC | Invest Cont | 745.0 | 998.0 | NaN | NaN | 5400 CORTEEN PL | NaN | 34.1685 | -118.4019 | -84.2334 |
4 | 191921269 | 01/01/2020 12:00:00 AM | 01/01/2020 12:00:00 AM | 415 | 19 | Mission | 1998 | 2 | 740 | VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA... | 0329 | 31 | X | X | 409.0 | BEAUTY SUPPLY STORE | NaN | NaN | IC | Invest Cont | 740.0 | NaN | NaN | NaN | 14400 TITUS ST | NaN | 34.2198 | -118.4468 | -84.2270 |
dfc = df_crime.head(10000) # sample 10,0000
def point_inside_polygon(lat,lng,poly):
p1 = Point(lng,lat)
if p1.within(poly):
return True
else:
return False
def get_neighbourhood(row,gdf):
for ix, area in gdf.iterrows(): # geometry df
is_in_area=False
if row['lat'] and row['lon'] and row['lat']!=0 and row['lon']!=0:
is_in_area = point_inside_polygon(row['lat'], row['lon'], area['geometry'])
if is_in_area:
#found area, exit
return area['name']
return None
dfc['name'] = dfc.progress_apply(lambda row: get_neighbourhood(row,gdf), axis = 1)
100%|██████████| 10000/10000 [01:30<00:00, 110.39it/s]
<ipython-input-33-5acc60d73ec9>:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
dfc['name'] = dfc.progress_apply(lambda row: get_neighbourhood(row,gdf), axis = 1)
dfc.head()
dr_no | date rptd | date occ | time occ | area | area name | rpt dist no | part 1-2 | crm cd | crm cd desc | mocodes | vict age | vict sex | vict descent | premis cd | premis desc | weapon used cd | weapon desc | status | status desc | crm cd 1 | crm cd 2 | crm cd 3 | crm cd 4 | location | cross street | lat | lon | name | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 10304468 | 01/08/2020 12:00:00 AM | 01/08/2020 12:00:00 AM | 2230 | 3 | Southwest | 377 | 2 | 624 | BATTERY - SIMPLE ASSAULT | 0444 0913 | 36 | F | B | 501.0 | SINGLE FAMILY DWELLING | 400.0 | STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE) | AO | Adult Other | 624.0 | NaN | NaN | NaN | 1100 W 39TH PL | NaN | 34.0141 | -118.2978 | Exposition Park |
1 | 190101086 | 01/02/2020 12:00:00 AM | 01/01/2020 12:00:00 AM | 330 | 1 | Central | 163 | 2 | 624 | BATTERY - SIMPLE ASSAULT | 0416 1822 1414 | 25 | M | H | 102.0 | SIDEWALK | 500.0 | UNKNOWN WEAPON/OTHER WEAPON | IC | Invest Cont | 624.0 | NaN | NaN | NaN | 700 S HILL ST | NaN | 34.0459 | -118.2545 | Downtown |
2 | 201220752 | 09/16/2020 12:00:00 AM | 09/16/2020 12:00:00 AM | 1230 | 12 | 77th Street | 1259 | 2 | 745 | VANDALISM - MISDEAMEANOR ($399 OR UNDER) | 2004 1820 0913 0329 1202 | 62 | M | B | 502.0 | MULTI-UNIT DWELLING (APARTMENT, DUPLEX, ETC) | NaN | NaN | IC | Invest Cont | 745.0 | NaN | NaN | NaN | 700 E 73RD ST | NaN | 33.9739 | -118.2630 | Florence |
3 | 191501505 | 01/01/2020 12:00:00 AM | 01/01/2020 12:00:00 AM | 1730 | 15 | N Hollywood | 1543 | 2 | 745 | VANDALISM - MISDEAMEANOR ($399 OR UNDER) | 0329 1402 | 76 | F | W | 502.0 | MULTI-UNIT DWELLING (APARTMENT, DUPLEX, ETC) | NaN | NaN | IC | Invest Cont | 745.0 | 998.0 | NaN | NaN | 5400 CORTEEN PL | NaN | 34.1685 | -118.4019 | Valley Village |
4 | 191921269 | 01/01/2020 12:00:00 AM | 01/01/2020 12:00:00 AM | 415 | 19 | Mission | 1998 | 2 | 740 | VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA... | 0329 | 31 | X | X | 409.0 | BEAUTY SUPPLY STORE | NaN | NaN | IC | Invest Cont | 740.0 | NaN | NaN | NaN | 14400 TITUS ST | NaN | 34.2198 | -118.4468 | Panorama City |
crime_counts = dfc[['name','dr_no']].groupby('name').count().reset_index().rename(columns = {'dr_no':'n'})
crime_counts
name | n | |
---|---|---|
0 | Adams-Normandie | 69 |
1 | Arleta | 4 |
2 | Arlington Heights | 57 |
3 | Atwater Village | 23 |
4 | Baldwin Hills/Crenshaw | 146 |
5 | Bel-Air | 14 |
6 | Beverly Crest | 23 |
7 | Beverly Grove | 198 |
8 | Beverlywood | 13 |
9 | Boyle Heights | 423 |
10 | Brentwood | 95 |
11 | Broadway-Manchester | 31 |
12 | Canoga Park | 27 |
13 | Carthay | 36 |
14 | Central-Alameda | 16 |
15 | Century City | 37 |
16 | Chatsworth | 17 |
17 | Chesterfield Square | 5 |
18 | Cheviot Hills | 21 |
19 | Chinatown | 93 |
20 | Cypress Park | 23 |
21 | Del Rey | 12 |
22 | Downtown | 1121 |
23 | Eagle Rock | 33 |
24 | East Hollywood | 216 |
25 | Echo Park | 140 |
26 | El Sereno | 152 |
27 | Elysian Park | 27 |
28 | Elysian Valley | 11 |
29 | Encino | 135 |
30 | Exposition Park | 194 |
31 | Fairfax | 146 |
32 | Florence | 31 |
33 | Glassell Park | 44 |
34 | Gramercy Park | 5 |
35 | Granada Hills | 12 |
36 | Green Meadows | 24 |
37 | Griffith Park | 15 |
38 | Hancock Park | 49 |
39 | Hansen Dam | 2 |
40 | Harbor City | 82 |
41 | Harbor Gateway | 94 |
42 | Harvard Heights | 11 |
43 | Harvard Park | 11 |
44 | Highland Park | 79 |
45 | Historic South-Central | 39 |
46 | Hollywood | 673 |
47 | Hollywood Hills | 81 |
48 | Hollywood Hills West | 56 |
49 | Hyde Park | 40 |
50 | Jefferson Park | 121 |
51 | Koreatown | 56 |
52 | Lake Balboa | 74 |
53 | Lake View Terrace | 5 |
54 | Larchmont | 6 |
55 | Leimert Park | 107 |
56 | Lincoln Heights | 109 |
57 | Los Feliz | 80 |
58 | Manchester Square | 6 |
59 | Mar Vista | 4 |
60 | Mid-City | 219 |
61 | Mid-Wilshire | 195 |
62 | Mission Hills | 8 |
63 | Montecito Heights | 40 |
64 | Mount Washington | 21 |
65 | North Hills | 26 |
66 | North Hollywood | 37 |
67 | Northridge | 27 |
68 | Pacific Palisades | 55 |
69 | Pacoima | 35 |
70 | Palms | 28 |
71 | Panorama City | 26 |
72 | Pico-Robertson | 65 |
73 | Pico-Union | 83 |
74 | Playa Vista | 3 |
75 | Playa del Rey | 2 |
76 | Porter Ranch | 2 |
77 | Rancho Park | 14 |
78 | Reseda | 268 |
79 | San Pedro | 288 |
80 | Sawtelle | 153 |
81 | Sepulveda Basin | 20 |
82 | Shadow Hills | 3 |
83 | Sherman Oaks | 258 |
84 | Silver Lake | 90 |
85 | South Park | 22 |
86 | Studio City | 24 |
87 | Sun Valley | 32 |
88 | Sunland | 6 |
89 | Sylmar | 28 |
90 | Tarzana | 164 |
91 | Toluca Lake | 6 |
92 | Tujunga | 9 |
93 | University Park | 157 |
94 | Valley Glen | 114 |
95 | Valley Village | 12 |
96 | Van Nuys | 492 |
97 | Venice | 29 |
98 | Vermont Knolls | 15 |
99 | Vermont Square | 135 |
100 | Vermont Vista | 17 |
101 | Vermont-Slauson | 24 |
102 | Watts | 17 |
103 | West Adams | 96 |
104 | West Hills | 11 |
105 | West Los Angeles | 69 |
106 | Westchester | 40 |
107 | Westlake | 544 |
108 | Westwood | 105 |
109 | Wilmington | 232 |
110 | Windsor Square | 8 |
111 | Winnetka | 15 |
112 | Woodland Hills | 27 |
# fig, ax = plt.subplots(figsize = (30,30))
# gdf[gdf.location!='CENTRAL'].plot(ax = ax, color = 'blue')
# gdf[gdf.location=='CENTRAL'].plot(ax = ax, color = 'red')
gdf[gdf.fid==75]
gdf = gdf.merge(crime_counts,on = 'name',how = 'left')
gdf.head()
objectid | name | geometry | n | |
---|---|---|---|---|
0 | 1 | Adams-Normandie | POLYGON ((-118.30069 34.03731, -118.30388 34.0... | 69.0 |
1 | 2 | Arleta | POLYGON ((-118.44255 34.26412, -118.44240 34.2... | 4.0 |
2 | 3 | Arlington Heights | POLYGON ((-118.31862 34.05306, -118.31673 34.0... | 57.0 |
3 | 4 | Atwater Village | MULTIPOLYGON (((-118.27886 34.15321, -118.2788... | 23.0 |
4 | 5 | Baldwin Hills/Crenshaw | POLYGON ((-118.36102 34.02532, -118.36026 34.0... | 146.0 |
fig, ax = plt.subplots(1,figsize = (15,15)) # create figure and axes for Matplotlib
ax.set_title('Crime counts by neighborhood',
fontdict={'fontsize': '25', 'fontweight' : '3'})
vmin = gdf.n.min()
vmax = gdf.n.max()
gdf.plot(column = 'n',cmap = 'Greens',linewidth = 1, ax = ax, edgecolor = '.5')
# remove the axis
ax.axis('off')
# Create colorbar legend
sm = plt.cm.ScalarMappable(cmap='Greens', norm=plt.Normalize(vmin=vmin, vmax=vmax))
# empty array for the data range
sm.set_array([])
#add colorbar
cbar = fig.colorbar(sm)
# Add location for the labels
gdf['coords'] = gdf['geometry'].apply(lambda x: x.representative_point().coords[:])
gdf['coords'] = [coords[0] for coords in gdf['coords']]
# Display names
for idx, row in gdf.iterrows():
plt.annotate(s=row['name'], xy=row['coords'],horizontalalignment='center',fontsize=9,color = 'white')
<ipython-input-44-d2ef1ee917c5>:27: MatplotlibDeprecationWarning: The 's' parameter of annotate() has been renamed 'text' since Matplotlib 3.3; support for the old name will be dropped two minor releases later.
plt.annotate(s=row['name'], xy=row['coords'],horizontalalignment='center',fontsize=9,color = 'white')
Matplotlib colormaps: https://matplotlib.org/stable/tutorials/colors/colormaps.html
gdf2 = gpd.read_file(data_path + 'COVID19_by_Neighborhood-shp/COVID19_by_Neighborhood.shp')
gdf2 = convert_columns_to_lowercase(gdf2)
gdf2.head()
fid | comty_name | cert | shape_leng | shape_area | location | cases | case_rate | deaths | death_rate | date | shape__are | shape__len | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | GRANADA HILLS | yes | 89564.685903 | 4.264128e+08 | GRANADA HILLS | 457 | 785 | 48 | 82 | 2020-06-18 | 5.818514e+07 | 33094.502125 | POLYGON ((-118.54219 34.29860, -118.54084 34.2... |
1 | 2 | PORTER RANCH | yes | 73616.951350 | 2.267371e+08 | PORTER RANCH | 111 | 312 | 7 | 20 | 2020-06-18 | 3.093173e+07 | 27183.552463 | POLYGON ((-118.59618 34.27462, -118.59377 34.2... |
2 | 3 | LAKEVIEW TERRACE | yes | 75477.956189 | 1.448471e+08 | LAKEVIEW TERRACE | 158 | 1203 | 4 | 30 | 2020-06-18 | 1.975813e+07 | 27863.458449 | POLYGON ((-118.40834 34.28671, -118.40755 34.2... |
3 | 4 | MISSION HILLS | yes | 50728.509850 | 1.061586e+08 | MISSION HILLS | 214 | 887 | 10 | 41 | 2020-06-18 | 1.447757e+07 | 18746.395093 | POLYGON ((-118.47067 34.29432, -118.46554 34.2... |
4 | 5 | ANGELES NATIONAL FOREST | yes | 92240.837542 | 1.458565e+08 | ANGELES NATIONAL FOREST | 1 | 2500 | 0 | 0 | 2020-06-18 | 1.989453e+07 | 34046.544055 | POLYGON ((-118.33605 34.28250, -118.33494 34.2... |
fig, ax = plt.subplots(1,figsize = (10,10)) # create figure and axes for Matplotlib
ax.set_title('Covid deaths by neighborhood',
fontdict={'fontsize': '25', 'fontweight' : '3'})
vmin = gdf2.deaths.min()
vmax = gdf2.deaths.max()
gdf2.plot(column = 'deaths',cmap = 'viridis',linewidth = 1, ax = ax, edgecolor = '.5')
# remove the axis
ax.axis('off')
# Create colorbar legend
sm = plt.cm.ScalarMappable(cmap='viridis', norm=plt.Normalize(vmin=vmin, vmax=vmax))
# empty array for the data range
sm.set_array([])
#add colorbar
cbar = fig.colorbar(sm)
# Add location for the labels
gdf['coords'] = gdf['geometry'].apply(lambda x: x.representative_point().coords[:])
gdf['coords'] = [coords[0] for coords in gdf['coords']]
# # Display names
# for idx, row in gdf.iterrows():
# plt.annotate(s=row['n'], xy=row['coords'],horizontalalignment='center',fontsize=9)
gdf2 = gdf2.merge(df_covid[['fid','cases','case_rate','deaths','death_rate']], on = 'fid')
gdf2.head()
Spatial joins using sjoin
1 to many mappings
d = {'col1': [1, 2], 'col2': [3, 4]}
df = pd.DataFrame(data=d)