Comparing raster data for regions of interest¶
In this notebook we compare our 30m resolution rasters of risk with those from the Wildfire Risk to Communities (WRC) project. We visualize regions where the two datasets strongly differ, as well as regions of historical significance (e.g. previous urban conflagrations). These comparisons showcase examples of why the datasets differ and explain patterns of how those differences propagate into CONUS-wide variations.
import warnings
import geopandas as gpd
import icechunk
import matplotlib.pyplot as plt
import xarray as xr
from shapely import Point
from ocr import catalog
warnings.filterwarnings('ignore')
Load in the raster level data.
version = '0.13.1'
storage = icechunk.s3_storage(
bucket='carbonplan-ocr',
prefix=f'output/fire-risk/tensor/production/v{version}/ocr.icechunk',
from_env=True,
)
repo = icechunk.Repository.open(storage)
session = repo.readonly_session('main')
ds = xr.open_dataset(session.store, engine='zarr', chunks={})
Load wind direction distribution for this region
wind_dist = (
catalog.get_dataset('conus404-ffwi-p99-wind-direction-distribution-30m-4326')
.to_xarray()
.wind_direction_distribution
)
Load in shapefiles created by the compare-risk-buildings.ipynb notebook that identify tracts to focus on.
tracts_dict = {}
for statistic in ['corr_low', 'low_bias', 'high_bias']:
tracts_dict[statistic] = gpd.read_file(f'{statistic}_tracts_{version}.shp', index_col=0)
states = gpd.read_file('s3://carbonplan-risks/shapefiles/cb_2018_us_state_20m.zip')
states = states[~(states['STUSPS'].isin(['AK', 'PR', 'HI']))]
var_name_dict = {'corr_low': 'low correlation', 'high_bias': 'high bias', 'low_bias': 'low bias'}
def plot_regions_two_datasets(
ds,
wind_dist,
bounding_box,
title,
ds1_title='CarbonPlan',
ds2_title='Scott (2024)',
points_to_plot=None,
vlims=None,
):
fig = plt.figure(figsize=(16, 8))
# create a 2x8 grid, so the bottom row can have 8 equal panels
gs = fig.add_gridspec(nrows=2, ncols=8, height_ratios=[1, 1], hspace=0.35, wspace=0.35)
# first row: 3 panels that span the 8 columns
ax_top1 = fig.add_subplot(gs[0, 0:3])
ax_top2 = fig.add_subplot(gs[0, 3:6])
ax_top3 = fig.add_subplot(gs[0, 6:8])
# second row - 8 panels for the wind directions
axes_bottom = [fig.add_subplot(gs[1, i]) for i in range(8)]
if vlims is not None:
cax = ds.wind_risk_2011.plot(ax=ax_top1, vmin=vlims[0], vmax=vlims[1])
else:
cax = ds.wind_risk_2011.plot(ax=ax_top1)
ax_top1.set_title(ds1_title)
ds.USFS_RPS.plot(ax=ax_top2, vmin=cax.get_clim()[0], vmax=cax.get_clim()[1])
ax_top2.set_title(ds2_title)
states.plot(ax=ax_top3, color='white', edgecolor='black')
if points_to_plot is None:
gpd.GeoSeries(Point(bounding_box[1].start, bounding_box[0].start)).set_crs(
'EPSG:4326'
).centroid.plot(ax=ax_top3)
else:
points_to_plot.centroid.plot(ax=ax_top3)
# Row 2: Wind direction distribution for all 8 directions
# wind is indexed w.r.t. the winds from CONUS404 which are indexed as 0 is from the north, 1 is from the northeast
directions = ['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW']
for i, ax in enumerate(axes_bottom):
# only add colorbar to rightmost plot
if i == 7:
add_colorbar = True
else:
add_colorbar = False
wind_dist.sel(
latitude=bounding_box[0],
longitude=bounding_box[1],
wind_direction=i,
).plot(ax=ax, cmap='viridis', vmin=0, vmax=1, add_colorbar=add_colorbar)
ax.set_title(f'Wind from\nDirection: {directions[i]}')
for ax in [ax_top1, ax_top2, ax_top3] + axes_bottom: # axes is an array from plt.subplots(...)
ax.tick_params(
axis='both',
which='both',
bottom=False,
top=False,
left=False,
right=False,
labelbottom=False,
labelleft=False,
)
ax.set_xlabel('')
ax.set_ylabel('')
if title is not None:
plt.suptitle(title, fontsize='xx-large')
plt.tight_layout()
plt.suptitle(title)
plt.tight_layout()
plt.show()
Plotting specific regions where the OCR and WRC data differed substantially¶
We use the results of the hot spot identification in the compare-risk-buildings.ipynb notebook to direct us to places where OCR and WRC differed substantially as measured either by correlation or low/high bias.
Areas where datasets are poorly correlated¶
By plotting the census tracts with the lowest correlations we see the two main causes of low agreement:
- The influence of wind, in which we preferentially spread risk in some places while WRC uniformly spreads it. We'll call this "wind effect". This can either increase or decrease the risk values depending on whether the wind is smearing the burn probability preferentially away from or toward developed areas.
- The WRC project's underlying burn probability maps decrease burn probability in many developed areas. This is visible in maps where Scott has more detailed spatial structure in contrast to our blurrier maps. We'll call this the "development effect". This only contributes to a high bias in our data with respect to Scott (2024).
for statistic in ['corr_low']:
for index in tracts_dict['corr_low'].index:
bounds = tracts_dict['corr_low'].loc[[index]].bounds
bounding_box = [
slice(bounds.loc[index].miny, bounds.loc[index].maxy),
slice(bounds.loc[index].minx, bounds.loc[index].maxx),
]
plot_regions_two_datasets(
ds.sel(latitude=bounding_box[0], longitude=bounding_box[1]),
wind_dist,
bounding_box,
f'Area where CarbonPlan data has {var_name_dict[statistic]} w.r.t. Scott (2024)',
points_to_plot=tracts_dict['corr_low'].loc[[index]].centroid,
ds1_title='OCR',
)
Areas where we are high-biased with respect to Scott et al. (2024)¶
As with the plots of areas of low correlation above, we see strong high biases in places where both the wind effect and the development effect are at play. The sixth row below shows an example of where spatial artifacts from the underlying conditional risk to potential structures (cRPS) are visible in our data. This is because our higher burn probability (elevated due to the wind effect), makes them visible in our data while they are washed out in the Scott et al. (2024) data.
for statistic in ['high_bias']:
for index in tracts_dict[statistic].index:
bounds = tracts_dict[statistic].loc[[index]].bounds
bounding_box = [
slice(bounds.loc[index].miny, bounds.loc[index].maxy),
slice(bounds.loc[index].minx, bounds.loc[index].maxx),
]
plot_regions_two_datasets(
ds.sel(latitude=bounding_box[0], longitude=bounding_box[1]),
wind_dist,
bounding_box,
f'Area where CarbonPlan data has {var_name_dict[statistic]} w.r.t. Scott (2024)',
points_to_plot=tracts_dict[statistic].loc[[index]].centroid,
ds1_title='OCR',
)
Areas where we are low-biased with respect to Scott et al. (2024)¶
The develoment effect does not contribute to places where we are low-biased with respect to the Scott et al. (2024) data. Low-biased areas are places where our risk maps depict lower risk values because we preferentially spread burn probability away from developed areas.
for statistic in ['low_bias']:
for index in tracts_dict[statistic].index:
bounds = tracts_dict[statistic].loc[[index]].bounds
bounding_box = [
slice(bounds.loc[index].miny, bounds.loc[index].maxy),
slice(bounds.loc[index].minx, bounds.loc[index].maxx),
]
plot_regions_two_datasets(
ds.sel(latitude=bounding_box[0], longitude=bounding_box[1]),
wind_dist,
bounding_box,
f'Area where CarbonPlan data has {var_name_dict[statistic]} w.r.t. Scott (2024)',
points_to_plot=tracts_dict[statistic].loc[[index]].centroid,
ds1_title='OCR',
)
Specific areas of interest¶
We look at a handful of regions of interest to understand how our estimates compare with those of Scott, with the additional context of the wind directions used to inform the burn probability spread.
# specify coordinates of locations you want to inspect
locations = {
'Griffith Observatory': [34.15883207704988, -118.3194545446337],
'Altadena, CA (Eaton Fire)': [34.197179348047236, -118.14920706081259],
'Paradise, CA (Camp Fire)': [39.81028, -121.43722],
'Superior, CO (Marshall Fire)': [39.95, -105.15],
'Malibu, CA (Woolsey Fire)': [34.2350, -118.7013],
'Leavenworth, WA': [47.59623, -120.66148],
}
for location, (lat, lon) in locations.items():
bounding_box = [slice(lat - 0.05, lat + 0.05), slice(lon - 0.05, lon + 0.05)]
plot_regions_two_datasets(
ds.sel(latitude=bounding_box[0], longitude=bounding_box[1]),
wind_dist,
bounding_box,
location,
ds1_title='OCR',
# vlims=[0, 2.5],
)
Places with spatial artifacts due to underlying cRPS data¶
Locations around irrigation/farmland in the Midwest and suburban areas with low BP and spotty cRPS contribute spatial artifacts.
lat = 42.14235
lon = -96.71113
bounding_box = [slice(lat - 0.05, lat + 0.05), slice(lon - 0.05, lon + 0.05)]
plot_regions_two_datasets(
ds.sel(latitude=bounding_box[0], longitude=bounding_box[1]),
wind_dist,
bounding_box,
'Near Pender, NE',
ds1_title='OCR',
)
lat = 34.13515
lon = -102.06897
bounding_box = [slice(lat - 0.05, lat + 0.05), slice(lon - 0.05, lon + 0.05)]
plot_regions_two_datasets(
ds.sel(latitude=bounding_box[0], longitude=bounding_box[1]),
wind_dist,
bounding_box,
'Near Plainview, TX',
ds1_title='OCR',
)
lat = 40.29301
lon = -74.63134
bounding_box = [slice(lat - 0.05, lat + 0.05), slice(lon - 0.05, lon + 0.05)]
plot_regions_two_datasets(
ds.sel(latitude=bounding_box[0], longitude=bounding_box[1]),
wind_dist,
bounding_box,
'Near West Windsor, NJ',
ds1_title='OCR',
)