-
Notifications
You must be signed in to change notification settings - Fork 0
/
helpers.py
184 lines (143 loc) · 7.61 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import geopandas as gpd
from h3 import hex_range_distances
import pandas as pd
import numpy as np
from functools import reduce
from shapely.ops import unary_union
from urbanpy.utils import geo_boundary_to_polygon
from math import ceil
r_dists = {0: 123499.97082906487,
1: 864499.7933786848,
2: 2346499.4263399825,
3: 4569498.845439567,
4: 7533498.014266526,
5: 11238496.884271327,
6: 15684495.394773053,
7: 20871493.472944934}
ind_labels = {'Densidad Poblacional': 'population_2020',
'Densidad Poblacional (<5 años)': 'population_children',
'Densidad Poblacional (15-24 años)': 'population_youth',
'Densidad Poblacional (>60 años)': 'population_elderly',
'Acceso a Comercios aledaños': 'Ai',
'Disponibilidad de Comercios aledaños': 'ds',
'Lugares de Venta Minorista': 'Retail',
'Lugares de Servicios': 'Services',
'Lugares de Bebidas, Comidas y Alojamiento': 'BFA',
'Lugares de Manufactura': 'Manufacturing',
'Lugares de Otros': 'Other',
'Ingreso Promedio por Hogar': 'INGR_HOG_PROM',
'Ingreso Per Cápita en General': 'INGR_PER',
'Ingreso Per Cápita Alto (>1700)': 'Alto',
'Ingreso Per Cápita Medio Alto (900-1700)': 'Medio Alto',
'Ingreso Per Cápita Medio (550-900)': 'Medio',
'Ingreso Per Cápita Medio Bajo (380-550)': 'Medio Bajo',
'Ingreso Per Cápita Bajo (<550)': 'Medio Bajo',
'Vulnerabilidad Monetaria': 'vulnerabilidad_monetaria',
'Vulnerabilidad Laboral': 'vulnerabilidad_laboral',
'Vulnerabilidad Hídrica': 'vulnerabilidad_hidrica',
'Distancia al Lugar de Venta de Alimento más cercano': 'distance_to_food_facility',
'Duración del viaje al Lugar de Venta de Alimento más cercano': 'distance_to_food_facility',
'Distancia al Centro de Salud más cercano': 'distance_to_health_facility',
'Duración del viaje al Centro de Salud más cercano': 'duration_to_health_facility',}
plotly_chart_labels = {v: k for k, v in ind_labels.items()}
sort_ascending = {'Ascendentemente': True, 'Descendentemente': False}
plotly_radar_labels = {'group':'Grupo', 'indicator_labels':'Indicador', 'value':'Valor'}
def create_ind_param(label, sort_asc):
return {'col_name': ind_labels[label], 'ascending': sort_ascending[sort_asc]}
def get_hex_neighbours(hex_id, range_distance=2):
neighboards = hex_range_distances(hex_id, range_distance)
nbs_h3ids = list(set.union(*neighboards))
nbs_geometries = unary_union([geo_boundary_to_polygon(h3id) for h3id in nbs_h3ids])
return nbs_h3ids, nbs_geometries
def get_zones(hexs, hex_col='hex', range_distance=2):
'''
Returns
-------
zones: regions represented by hexagon sets from the H3 geospatial indexing system.
'''
hexclusters = hexs.apply(lambda row: get_hex_neighbours(row[hex_col], range_distance),
axis=1, result_type='expand')
hexclusters = hexclusters.reset_index()
print('HEXclusters shape:', hexclusters.shape)
hexclusters.columns = ['zone_id', 'h3cluster', 'geometry']
zones = gpd.GeoDataFrame(hexclusters).set_crs('epsg:4326')
return zones
def filter_green_areas(green_areas, zones, hexs):
green_areas_copy = green_areas.copy()
green_areas_copy = green_areas_copy.reset_index()
hexs_copy = hexs.copy()
hexs_copy = hexs_copy.to_crs(epsg=4326)
filtered_green_areas = (gpd.sjoin(green_areas_copy, zones) # Mask green areas with zones
.drop_duplicates('index') # Remove duplicates (candidates that intersect with more than one zone)
.drop(['index_right', 'h3cluster'], axis=1)) # Remove unused cols
h3_ixs = [item for item_list in zones['h3cluster'] for item in item_list] # Flatten h3cluster ixs
nbs_hexs = hexs_copy.query(f"hex in {h3_ixs}") # Filter hexs with indicators
filtered_green_areas = gpd.sjoin(filtered_green_areas, nbs_hexs) # Add indicators to candidates
return filtered_green_areas
def sample_n_per_group(group, sample_sizes):
return pd.DataFrame.sample(group, n=sample_sizes[group.name])
def sample_random_candidates(candidates, group_col, n):
'''
Random sample of size N for each zone
Parameters
----------
n: Size of sample for each zone
'''
ss = candidates[group_col].value_counts().clip(upper=n).to_dict() # Handle len(groups) < n
candidate_groups = candidates.groupby('zone_id', group_keys=False)
candidate_rsample = candidate_groups.apply(sample_n_per_group, sample_sizes=ss)
return candidate_rsample
def calc_range_distances(container):
for r in np.arange(3,-1,-1):
if (r_dists[r] / container.area) < 0.2:
return r
def calc_candidates(hexs_orig, n_candidates, radius, threshold, primary_ind, secondary_ind, green_areas, range_dist):
candidates = []
hexs = hexs_orig.copy() # Freely mutate cached object
hexs = hexs.to_crs(epsg=32718)
hexs['buffer'] = hexs.geometry.buffer(radius) # meters
target_hex = hexs.sort_values([primary_ind['col_name'], secondary_ind['col_name']],
ascending=[primary_ind['ascending'], secondary_ind['ascending']])
print('target_hexs shape:', target_hex.shape)
def eval_threshold(primary_ind=primary_ind, threshold=threshold):
if primary_ind['ascending']:
return target_hex.iloc[0][primary_ind['col_name']] <= threshold
else:
return target_hex.iloc[0][primary_ind['col_name']] >= threshold
while (len(candidates) < n_candidates) and eval_threshold():
#1. Sort available candidates
target_hex = target_hex.sort_values([primary_ind['col_name'], secondary_ind['col_name']], ascending=[primary_ind['ascending'], secondary_ind['ascending']])
#2. Add block id to candidates
candidate_idx = target_hex.iloc[0]['hex']
candidates.append(candidate_idx)
#3. Get the neighborhood buffer to filter blocks (maybe replace with the res 9 hex id to eliminate a whole hex)
filter_buffer = target_hex.iloc[0]['buffer']
#4. Remove candidate from target set
target_hex = target_hex[target_hex['hex'] != candidate_idx]
#5. Filter the candidate set (exclude hex closest neighbours)
target_hex = target_hex[~target_hex.geometry.intersects(filter_buffer)]
print('Candidates length:', len(candidates))
candidates_df = hexs[hexs['hex'].isin(candidates)]
print('candidates_df shape:', candidates_df.shape)
zones_df = get_zones(candidates_df, range_distance=range_dist)
print('zones shape:', zones_df.shape)
filtered_green_areas = filter_green_areas(green_areas, zones_df, hexs)
return zones_df, filtered_green_areas
def get_indicators(indicators, groups, total_candidates):
'''
Get groups average indicators
'''
dfs = []
for group_name, group_df in groups.items():
group_inds = group_df[indicators].mean()
group_inds.name = group_name
dfs.append(group_inds)
inds = reduce(lambda left, right: pd.merge(left, right, right_index=True, left_index=True),
dfs).T
# Standarize variables w.t.r to the total candidates to compare in radar plot
inds_norm = ( inds - total_candidates[indicators].mean() ) / total_candidates[indicators].std()
inds = inds.stack().reset_index()
inds.columns = ['group', 'indicator', 'value']
inds['value_norm'] = inds_norm.stack().values
inds['indicator_labels'] = inds['indicator'].replace(plotly_chart_labels)
return inds