Skip to content

saber.fdc

fdc(flows, steps=101, col_name='Q')

Compute flow duration curve (exceedance probabilities) from a list of flows

Parameters:

Name Type Description Default
flows np.array

array of flows

required
steps int

number of steps (exceedance probabilities) to use in the FDC

101
col_name str

name of the column in the returned dataframe

'Q'

Returns:

Type Description
pd.DataFrame

pd.DataFrame with index 'p_exceed' and columns 'Q' (or col_name)

Source code in saber/fdc.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
def fdc(flows: np.array, steps: int = 101, col_name: str = 'Q') -> pd.DataFrame:
    """
    Compute flow duration curve (exceedance probabilities) from a list of flows

    Args:
        flows: array of flows
        steps: number of steps (exceedance probabilities) to use in the FDC
        col_name: name of the column in the returned dataframe

    Returns:
        pd.DataFrame with index 'p_exceed' and columns 'Q' (or col_name)
    """
    # calculate the FDC and save to parquet
    exceed_prob = np.linspace(100, 0, steps)
    fdc_flows = np.nanpercentile(flows, exceed_prob)
    df = pd.DataFrame(fdc_flows, columns=[col_name, ], index=exceed_prob)
    df.index.name = 'p_exceed'
    return df

precalc_sfdcs(assign_row, gauge_data, hindcast_zarr)

Compute the scalar flow duration curve (exceedance probabilities) from two flow duration curves

Parameters:

Name Type Description Default
assign_row pd.DataFrame

a single row from the assignment table

required
gauge_data str

string path to the directory of observed data

required
hindcast_zarr str

string path to the hindcast streamflow dataset

required

Returns:

Type Description
pd.DataFrame

pd.DataFrame with index (exceedance probabilities) and a column of scalars

Source code in saber/fdc.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def precalc_sfdcs(assign_row: pd.DataFrame, gauge_data: str, hindcast_zarr: str) -> pd.DataFrame:
    """
    Compute the scalar flow duration curve (exceedance probabilities) from two flow duration curves

    Args:
        assign_row: a single row from the assignment table
        gauge_data: string path to the directory of observed data
        hindcast_zarr: string path to the hindcast streamflow dataset

    Returns:
        pd.DataFrame with index (exceedance probabilities) and a column of scalars
    """
    # todo
    # read the simulated data
    hz = xr.open_mfdataset(hindcast_zarr, concat_dim='rivid', combine='nested', parallel=True, engine='zarr')
    sim_df = hz['Qout'][:, hz.rivid.values == int(assign_row[COL_MID])].values
    sim_df = pd.DataFrame(sim_df, index=pd.to_datetime(hz['time'].values), columns=[COL_QSIM])
    sim_df = sim_df[sim_df.index.year >= 1980]

    # read the observed data
    obs_df = pd.read_csv(os.path.join(gauge_data, f'{assign_row[COL_GID]}.csv'), index_col=0)
    obs_df.index = pd.to_datetime(obs_df.index)

    sim_fdcs = []
    obs_fdcs = []
    for month in range(1, 13):
        sim_fdcs.append(fdc(sim_df[sim_df.index.month == month].values.flatten()).values.flatten())
        obs_fdcs.append(fdc(obs_df[obs_df.index.month == month].values.flatten()).values.flatten())

    sim_fdcs.append(fdc(sim_df.values.flatten()))
    obs_fdcs.append(fdc(obs_df.values.flatten()))

    sim_fdcs = np.array(sim_fdcs)
    obs_fdcs = np.array(obs_fdcs)
    sfdcs = np.divide(sim_fdcs, obs_fdcs)
    return sfdcs

sfdc(sim_fdc, obs_fdc)

Compute the scalar flow duration curve (exceedance probabilities) from two flow duration curves

Parameters:

Name Type Description Default
sim_fdc pd.DataFrame

simulated flow duration curve

required
obs_fdc pd.DataFrame

observed flow duration curve

required

Returns:

Type Description
pd.DataFrame

pd.DataFrame with index (exceedance probabilities) and a column of scalars

Source code in saber/fdc.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def sfdc(sim_fdc: pd.DataFrame, obs_fdc: pd.DataFrame) -> pd.DataFrame:
    """
    Compute the scalar flow duration curve (exceedance probabilities) from two flow duration curves

    Args:
        sim_fdc: simulated flow duration curve
        obs_fdc: observed flow duration curve

    Returns:
        pd.DataFrame with index (exceedance probabilities) and a column of scalars
    """
    scalars_df = pd.DataFrame(
        np.divide(sim_fdc, obs_fdc.values.flatten()),
        columns=['scalars', ],
        index=sim_fdc.index
    )
    scalars_df.replace(np.inf, np.nan, inplace=True)
    scalars_df.dropna(inplace=True)
    return scalars_df