Skip to content

saber.gis

create_maps(assign_df=None, drain_gis=None, prefix='')

Runs all the clip functions which create subsets of the drainage lines GIS dataset based on how they were assigned for bias correction.

Parameters:

Name Type Description Default
assign_df pd.DataFrame

the assignment table dataframe

None
drain_gis gpd.GeoDataFrame

a geodataframe of the drainage lines gis dataset

None
prefix str

a prefix for names of the outputs to distinguish between data generated in separate instances

''

Returns:

Type Description
None

None

Source code in saber/gis.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def create_maps(assign_df: pd.DataFrame = None, drain_gis: gpd.GeoDataFrame = None, prefix: str = '') -> None:
    """
    Runs all the clip functions which create subsets of the drainage lines GIS dataset based on how they were assigned
    for bias correction.

    Args:
        assign_df: the assignment table dataframe
        drain_gis: a geodataframe of the drainage lines gis dataset
        prefix: a prefix for names of the outputs to distinguish between data generated in separate instances

    Returns:
        None
    """
    if assign_df is None:
        assign_df = read_table('assign_table')
    if drain_gis is None:
        drain_gis = read_gis('drain_gis')

    if type(drain_gis) == str:
        gdf = gpd.read_file(drain_gis)
    elif type(drain_gis) == gpd.GeoDataFrame:
        gdf = drain_gis
    else:
        raise TypeError(f'Invalid type for drain_gis: {type(drain_gis)}')

    map_by_reason(assign_df, gdf, prefix)
    map_by_cluster(assign_df, gdf, prefix)
    map_unassigned(assign_df, gdf, prefix)
    return

histomaps(gdf, metric, prct)

Creates a histogram of the KGE2012 values for the validation set

Parameters:

Name Type Description Default
gdf gpd.GeoDataFrame

a GeoDataFrame containing validation metrics

required
metric str

name of th emetric to plot

required
prct str

Percentile of the validation set used to generate the histogram

required

Returns:

Type Description
None

None

Source code in saber/gis.py
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
def histomaps(gdf: gpd.GeoDataFrame, metric: str, prct: str) -> None:
    """
    Creates a histogram of the KGE2012 values for the validation set

    Args:
        gdf: a GeoDataFrame containing validation metrics
        metric:name of th emetric to plot
        prct: Percentile of the validation set used to generate the histogram

    Returns:
        None
    """
    core_columns = [COL_MID, COL_GID, 'geometry']
    # world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
    # world.plot(ax=axm, color='white', edgecolor='black')

    colors = ['#dc112e', '#d6db12', '#da9707', '#13c208', '#0824c2']
    bins = [-10, 0, 0.25, 0.5, 0.75, 1]
    cmap = mpl.colors.ListedColormap(colors)
    norm = mpl.colors.BoundaryNorm(boundaries=bins, ncolors=len(cmap.colors))
    title = metric.replace('KGE2012', 'Kling Gupta Efficiency 2012 - ') + f' {prct}% Gauges Excluded'

    hist_groups = []
    hist_colors = []
    categorize_by = [-np.inf, 0, 0.25, 0.5, 0.75, 1]
    for idx in range(len(categorize_by) - 1):
        gdfsub = gdf[gdf[metric] >= categorize_by[idx]]
        gdfsub = gdfsub[gdfsub[metric] < categorize_by[idx + 1]]
        if not gdfsub.empty:
            hist_groups.append(gdfsub[metric].values)
            hist_colors.append(colors[idx])

    fig, (axh, axm) = plt.subplots(
        1, 2, tight_layout=True, figsize=(9, 5), dpi=400, gridspec_kw={'width_ratios': [1, 1]})
    fig.suptitle(title, fontsize=20)

    median = round(gdf[metric].median(), 2)
    axh.set_title(f'Histogram (Median = {median})')
    axh.set_ylabel('Count')
    axh.set_xlabel('KGE 2012')
    axh.hist(hist_groups, color=hist_colors, bins=25, histtype='barstacked', edgecolor='black')
    axh.axvline(median, color='k', linestyle='dashed', linewidth=3)

    axm.set_title('Gauge Map')
    axm.set_ylabel('Latitude')
    axm.set_xlabel('Longitude')
    axm.set_xticks([])
    axm.set_yticks([])
    gdf[core_columns + [metric, ]].to_crs(epsg=3857).plot(metric)
    cx.add_basemap(ax=axm, zoom=9, source=cx.providers.Esri.WorldTopoMap, attribution='')

    fig.savefig(os.path.join(get_dir('gis'), f'{metric}_{prct}.png'))
    return

map_by_cluster(assign_table, drain_gis, prefix='')

Creates Geopackage files in workdir/gis_outputs of the drainage lines based on the fdc cluster they were assigned to

Parameters:

Name Type Description Default
assign_table pd.DataFrame

the assignment table dataframe

required
drain_gis str

path to a drainage line shapefile which can be clipped

required
prefix str

optional, a prefix to prepend to each created file's name

''

Returns:

Type Description
None

None

Source code in saber/gis.py
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def map_by_cluster(assign_table: pd.DataFrame, drain_gis: str, prefix: str = '') -> None:
    """
    Creates Geopackage files in workdir/gis_outputs of the drainage lines based on the fdc cluster they were assigned to

    Args:
        assign_table: the assignment table dataframe
        drain_gis: path to a drainage line shapefile which can be clipped
        prefix: optional, a prefix to prepend to each created file's name

    Returns:
        None
    """
    if isinstance(drain_gis, str):
        drain_gis = gpd.read_file(drain_gis)
    for num in assign_table[COL_CID].unique():
        logger.info(f'Creating GIS output for cluster: {num}')
        gdf = drain_gis[drain_gis[COL_MID].astype(str).isin(assign_table[assign_table[COL_CID] == num][COL_MID])]
        if gdf.empty:
            logger.debug(f'Empty filter: No streams are assigned to cluster {num}')
            continue
        gdf.to_file(os.path.join(get_dir('gis'), f'{prefix}{"_" if prefix else ""}cluster-{int(num)}.gpkg'))
    return

map_by_reason(assign_df, drain_gis, prefix='')

Creates Geopackage files in workdir/gis_outputs for each unique value in the assignment column

Parameters:

Name Type Description Default
assign_df pd.DataFrame

the assignment table dataframe

required
drain_gis

path to a drainage line shapefile which can be clipped

required
prefix str

a prefix for names of the outputs to distinguish between data generated at separate instances

''

Returns:

Type Description
None

None

Source code in saber/gis.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def map_by_reason(assign_df: pd.DataFrame, drain_gis: str or gpd.GeoDataFrame, prefix: str = '') -> None:
    """
    Creates Geopackage files in workdir/gis_outputs for each unique value in the assignment column

    Args:
        assign_df: the assignment table dataframe
        drain_gis: path to a drainage line shapefile which can be clipped
        prefix: a prefix for names of the outputs to distinguish between data generated at separate instances

    Returns:
        None
    """
    # read the drainage line shapefile
    if isinstance(drain_gis, str):
        drain_gis = gpd.read_file(drain_gis)

    # get the unique list of assignment reasons
    for reason in assign_df[COL_ASN_REASON].unique():
        logger.info(f'Creating GIS output for group: {reason}')
        selector = drain_gis[COL_MID].astype(str).isin(assign_df[assign_df[COL_ASN_REASON] == reason][COL_MID])
        subset = drain_gis[selector]
        name = f'{f"{prefix}_" if prefix else ""}assignments_{reason}.gpkg'
        if subset.empty:
            logger.debug(f'Empty filter: No streams are assigned for {reason}')
            continue
        else:
            subset.to_file(os.path.join(get_dir('gis'), name))
    return

map_ids(ids, drain_gis, prefix='', id_column=COL_MID)

Creates Geopackage files in workdir/gis_outputs of the subset of 'drain_shape' with an ID in the specified list

Parameters:

Name Type Description Default
ids list

any iterable containing a series of model_ids

required
drain_gis str

path to the drainage shapefile to be clipped

required
prefix str

optional, a prefix to prepend to each created file's name

''
id_column str

name of the id column in the attributes of the shape table

COL_MID

Returns:

Type Description
None

None

Source code in saber/gis.py
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
def map_ids(ids: list, drain_gis: str, prefix: str = '', id_column: str = COL_MID) -> None:
    """
    Creates Geopackage files in workdir/gis_outputs of the subset of 'drain_shape' with an ID in the specified list

    Args:
        ids: any iterable containing a series of model_ids
        drain_gis: path to the drainage shapefile to be clipped
        prefix: optional, a prefix to prepend to each created file's name
        id_column: name of the id column in the attributes of the shape table

    Returns:
        None
    """
    if isinstance(drain_gis, str):
        drain_gis = gpd.read_file(drain_gis)
    name = f'{prefix}{"_" if prefix else ""}id_subset.gpkg'
    drain_gis[drain_gis[id_column].isin(ids)].to_file(os.path.join(get_dir('gis'), name))
    return

map_unassigned(assign_table, drain_gis, prefix='')

Creates Geopackage files in workdir/gis_outputs of the drainage lines which haven't been assigned a gauge yet

Parameters:

Name Type Description Default
assign_table pd.DataFrame

the assignment table dataframe

required
drain_gis str

path to a drainage line shapefile which can be clipped

required
prefix str

optional, a prefix to prepend to each created file's name

''

Returns:

Type Description
None

None

Source code in saber/gis.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
def map_unassigned(assign_table: pd.DataFrame, drain_gis: str, prefix: str = '') -> None:
    """
    Creates Geopackage files in workdir/gis_outputs of the drainage lines which haven't been assigned a gauge yet

    Args:
        assign_table: the assignment table dataframe
        drain_gis: path to a drainage line shapefile which can be clipped
        prefix: optional, a prefix to prepend to each created file's name

    Returns:
        None
    """
    logger.info('Creating GIS output for unassigned basins')
    if isinstance(drain_gis, str):
        drain_gis = gpd.read_file(drain_gis)
    ids = assign_table[assign_table[COL_ASN_REASON] == 'unassigned'][COL_MID].values
    subset = drain_gis[drain_gis[COL_MID].astype(str).isin(ids)]
    if subset.empty:
        logger.debug('Empty filter: No streams are unassigned')
        return
    savepath = os.path.join(get_dir('gis'), f'{prefix}{"_" if prefix else ""}assignments_unassigned.gpkg')
    subset.to_file(savepath)
    return