Skip to content

saber.assign

assign_gauged(df)

Assigns basins a gauge for correction which contain a gauge

Parameters:

Name Type Description Default
df pd.DataFrame

the assignments table dataframe

required

Returns:

Type Description
pd.DataFrame

Copy of df1 with assignments made

Source code in saber/assign.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
def assign_gauged(df: pd.DataFrame) -> pd.DataFrame:
    """
    Assigns basins a gauge for correction which contain a gauge

    Args:
        df: the assignments table dataframe

    Returns:
        Copy of df1 with assignments made
    """
    selector = df[COL_GID].notna()
    df.loc[selector, COL_ASN_MID] = df[COL_MID]
    df.loc[selector, COL_ASN_GID] = df[COL_GID]
    df.loc[selector, COL_ASN_REASON] = 'gauged'
    return df

mp_assign(df=None)

Assigns basins a gauge for correction which contain a gauge

Parameters:

Name Type Description Default
df pd.DataFrame

the assignments table dataframe

None

Returns:

Type Description
pd.DataFrame

Copy of df1 with assignments made

Source code in saber/assign.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def mp_assign(df: pd.DataFrame = None) -> pd.DataFrame:
    """
    Assigns basins a gauge for correction which contain a gauge

    Args:
        df: the assignments table dataframe

    Returns:
        Copy of df1 with assignments made
    """
    if df is None:
        df = read_table('assign_table')
    df = assign_gauged(df)
    df = mp_assign_ungauged(df, get_state('n_processes'))
    return df

mp_assign_ungauged(df)

Parameters:

Name Type Description Default
df pd.DataFrame

the assignments table dataframe with the clustering labels already applied

required

Returns:

Type Description
pd.DataFrame

pd.DataFrame

Source code in saber/assign.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def mp_assign_ungauged(df: pd.DataFrame) -> pd.DataFrame:
    """

    Args:
        df: the assignments table dataframe with the clustering labels already applied

    Returns:
        pd.DataFrame
    """
    with Pool(get_state('n_processes')) as p:
        logger.info('Assign Basins within Clusters')
        for cluster_number in range(df[COL_CID].max() + 1):
            logger.info(f'Assigning basins in cluster {cluster_number}')
            # filter assign dataframe to only gauged basins within the cluster
            c_df = df[df[COL_CID] == cluster_number]
            c_df = c_df[c_df[COL_GID].notna()]
            # keep a list of the unassigned basins in the cluster
            mids = c_df[c_df[COL_ASN_REASON] == 'unassigned'][COL_MID].values
            df = pd.concat([
                pd.concat(p.starmap(_map_assign_ungauged, [(df, c_df, x) for x in mids])),
                df[~df[COL_MID].isin(mids)]
            ]).reset_index(drop=True)

    return df