Skip to content

saber.prep

calculate_fdc(workdir, df, n_steps=41)

Creates the hindcast_fdc.parquet and hindcast_fdc_transformed.parquet tables in the workdir/tables directory

Parameters:

Name Type Description Default
workdir str

path to the working directory for the project

required
df pd.DataFrame

the hindcast hydrograph data DataFrame with 1 column per stream, 1 row per timestep, string column names containing the stream's ID, and a datetime index. E.g. the shape should be (n_timesteps, n_streams). If not provided, the function will attempt to load the data from workdir/tables/hindcast_series_table.parquet

required
n_steps int

the number of exceedance probabilities to estimate from 0 to 100%, inclusive. Default is 41, which produces, 0, 2.5, 5, ..., 97.5, 100.

41

Returns:

Type Description
None

None

Source code in saber/prep.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def calculate_fdc(workdir: str, df: pd.DataFrame, n_steps: int = 41) -> None:
    """
    Creates the hindcast_fdc.parquet and hindcast_fdc_transformed.parquet tables in the workdir/tables directory

    Args:
        workdir: path to the working directory for the project
        df: the hindcast hydrograph data DataFrame with 1 column per stream, 1 row per timestep, string column names
            containing the stream's ID, and a datetime index. E.g. the shape should be (n_timesteps, n_streams). If not
            provided, the function will attempt to load the data from workdir/tables/hindcast_series_table.parquet
        n_steps: the number of exceedance probabilities to estimate from 0 to 100%, inclusive. Default is 41, which
            produces, 0, 2.5, 5, ..., 97.5, 100.

    Returns:
        None
    """
    exceed_prob = np.linspace(100, 0, n_steps)

    # write the ID list to file
    write_table(pd.DataFrame(df.columns, columns=[mid_col, ]), workdir, 'model_ids')

    # calculate the FDC and save to parquet
    fdc_df = df.apply(lambda x: np.transpose(np.nanpercentile(x, exceed_prob)))
    fdc_df.index = exceed_prob
    fdc_df.index.name = 'exceed_prob'
    write_table(fdc_df, workdir, 'hindcast_fdc')

    # transform and prepare for clustering
    fdc_df = pd.DataFrame(np.transpose(Scalar().fit_transform(np.squeeze(fdc_df.values))))
    fdc_df.index = df.columns
    fdc_df.columns = fdc_df.columns.astype(str)
    write_table(fdc_df, workdir, 'hindcast_fdc_trans')
    return

gis_tables(workdir, gauge_gis=None, drain_gis=None)

Generate copies of the drainage line attribute tables in parquet format using the Saber package vocabulary

Parameters:

Name Type Description Default
workdir str

path to the working directory for the project

required
gauge_gis str

path to the GIS dataset (e.g. geopackage) for the gauge locations (points)

None
drain_gis str

path to the GIS dataset (e.g. geopackage) for the drainage line locations (polylines)

None

Returns:

Type Description
None

None

Source code in saber/prep.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def gis_tables(workdir: str, gauge_gis: str = None, drain_gis: str = None) -> None:
    """
    Generate copies of the drainage line attribute tables in parquet format using the Saber package vocabulary

    Args:
        workdir: path to the working directory for the project
        gauge_gis: path to the GIS dataset (e.g. geopackage) for the gauge locations (points)
        drain_gis: path to the GIS dataset (e.g. geopackage) for the drainage line locations (polylines)

    Returns:
        None
    """
    if gauge_gis is not None:
        if gauge_gis.endswith('.parquet'):
            gdf = gpd.read_parquet(gauge_gis)
        else:
            gdf = gpd.read_file(gauge_gis)
        write_table(pd.DataFrame(gdf.drop('geometry', axis=1)), workdir, 'gauge_table')

    if drain_gis is not None:
        if drain_gis.endswith('.parquet'):
            gdf = gpd.read_parquet(drain_gis)
        else:
            gdf = gpd.read_file(drain_gis)
        gdf['centroid_x'] = gdf.geometry.centroid.x
        gdf['centroid_y'] = gdf.geometry.centroid.y
        gdf = gdf.drop('geometry', axis=1)
        write_table(pd.DataFrame(gdf), workdir, 'drain_table')
    return