`saber.io`

`get_dir(dir_name)`

Get the path to a directory within the workspace

Parameters:

Name	Type	Description	Default
`dir_name`	`str`	name of the directory	required

Returns:

Type	Description
`str`	path to the directory

Source code in saber/io.py

def get_dir(dir_name: str) -> str:
    """
    Get the path to a directory within the workspace

    Args:
        dir_name: name of the directory

    Returns:
        path to the directory
    """
    assert dir_name in [DIR_TABLES, DIR_GIS, DIR_CLUSTERS, DIR_VALID], f'"{dir_name}" is not a valid directory name'
    table_path = os.path.join(workdir, dir_name)
    if not os.path.exists(table_path):
        logger.warning(f'"{dir_name}" directory does not exist. Error imminent: {table_path}')
    return table_path

`get_state(prop)`

Get a state variable provided by the config or a controlled global variable

Parameters:

Name	Type	Description	Default
`prop`		name of the global variable	required

Returns:

Type	Description
	value of the global variable

Source code in saber/io.py

def get_state(prop) -> int or str:
    """
    Get a state variable provided by the config or a controlled global variable

    Args:
        prop: name of the global variable

    Returns:
        value of the global variable
    """
    assert prop in globals(), ValueError(f'"{prop}" is not a recognized project state key')
    return globals()[prop]

`init_workdir(path=None, overwrite=False)`

Creates the correct directories for a Saber project within the specified directory

Parameters:

Name	Type	Description	Default
`path`	`str`	the path to a directory where you want to create workdir subdirectories	`None`
`overwrite`	`bool`	boolean flag, delete existing directories and files and recreate the directory structure?	`False`

Returns:

Type	Description
`None`	None

Raises:

Type	Description
`NotADirectoryError`	if the path is not a directory

Source code in saber/io.py

def init_workdir(path: str = None, overwrite: bool = False) -> None:
    """
    Creates the correct directories for a Saber project within the specified directory

    Args:
        path: the path to a directory where you want to create workdir subdirectories
        overwrite: boolean flag, delete existing directories and files and recreate the directory structure?

    Returns:
        None

    Raises:
        NotADirectoryError: if the path is not a directory
    """
    if path is None:
        path = workdir

    if not os.path.exists(path):
        logger.warning(f'Provided path to workdir does not exist. Attempting to create: {path}')
        os.makedirs(path)
    elif overwrite:
        logger.warning(f'overwrite=True, Deleting existing workdir: {workdir}')
        shutil.rmtree(path)
        os.makedirs(path)

    for d in DIR_LIST:
        p = os.path.join(path, d)
        if not os.path.exists(p):
            os.mkdir(p)
    return

`list_cluster_files(n_clusters='all')`

Find all the kmeans model files in the project directory.

Parameters:

Name	Type	Description	Default
`n_clusters`		the number of clusters to find models for. If 'all', all models will be returned	`'all'`

Returns:

Type	Description
`List[str]`	List of paths to the kmeans model files

Raises:

Type	Description
`TypeError`	if n_clusters is not an int, iterable of int, or 'all'

Source code in saber/io.py

def list_cluster_files(n_clusters: int or Iterable = 'all') -> List[str]:
    """
    Find all the kmeans model files in the project directory.

    Args:
        n_clusters: the number of clusters to find models for. If 'all', all models will be returned

    Returns:
        List of paths to the kmeans model files

    Raises:
        TypeError: if n_clusters is not an int, iterable of int, or 'all'
    """
    kmeans_dir = os.path.join(workdir, DIR_CLUSTERS)
    if n_clusters == 'all':
        return natsorted(glob.glob(os.path.join(kmeans_dir, 'kmeans-*.pickle')))
    elif isinstance(n_clusters, int):
        return glob.glob(os.path.join(kmeans_dir, f'kmeans-{n_clusters}.pickle'))
    elif isinstance(n_clusters, Iterable):
        return natsorted([os.path.join(kmeans_dir, f'kmeans-{i}.pickle') for i in n_clusters])
    else:
        raise TypeError('n_clusters should be of type int or an iterable')

`read_config(config)`

Read the config file to set paths and values

Parameters:

Name	Type	Description	Default
`config`	`str`	path to the config file	required

Returns:

Type	Description
`None`	None

Source code in saber/io.py

def read_config(config: str) -> None:
    """
    Read the config file to set paths and values

    Args:
        config: path to the config file

    Returns:
        None
    """
    # open a yml and read to dictionary
    with open(config, 'r') as f:
        config_dict = yaml.safe_load(f)

    if config_dict is None:
        raise ValueError('Config file is empty')

    # set global variables
    for key, value in config_dict.items():
        if key not in VALID_YAML_KEYS:
            logger.error(f'Ignored invalid key in config file: "{key}". Consult docs for valid keys.')
            continue
        logger.info(f'Config: {key} = {value}')
        globals()[key] = value

    # validate inputs
    if not os.path.isdir(workdir):
        logger.warning(f'Workspace directory does not exist: {workdir}')
    if not os.path.exists(drain_gis):
        logger.warning(f'Drainage network GIS file does not exist: {drain_gis}')
    if not os.path.exists(gauge_gis):
        logger.warning(f'Gauge network GIS file does not exist: {gauge_gis}')
    if not os.path.isdir(gauge_data):
        logger.warning(f'Gauge data directory does not exist: {gauge_data}')
    if not glob.glob(hindcast_zarr):
        logger.warning(f'Hindcast zarr directory does not exist or is empty: {hindcast_zarr}')

    return

`read_gis(name)`

Read a GIS file from the project directory by name.

Parameters:

Name	Type	Description	Default
`name`	`str`	name of the GIS file to read	required

Returns:

Type	Description
`gpd.GeoDataFrame`	gpd.GeoDataFrame

Raises:

Type	Description
`ValueError`	if the GIS format is not recognized

Source code in saber/io.py

def read_gis(name: str) -> gpd.GeoDataFrame:
    """
    Read a GIS file from the project directory by name.

    Args:
        name: name of the GIS file to read

    Returns:
        gpd.GeoDataFrame

    Raises:
        ValueError: if the GIS format is not recognized
    """
    assert name in VALID_GIS_NAMES or name in GENERATE_GIS_NAMES_MAP, \
        ValueError(f'"{name}" is not a recognized project state key')
    return gpd.read_file(_get_gis_path(name))

`read_table(table_name)`

Read a table from the project directory by name.

Parameters:

Name	Type	Description	Default
`table_name`	`str`	name of the table to read	required

Returns:

Type	Description
`pd.DataFrame`	pd.DataFrame

Raises:

Type	Description
`FileNotFoundError`	if the table does not exist in the correct directory with the correct name
`ValueError`	if the table format is not recognized

Source code in saber/io.py

def read_table(table_name: str) -> pd.DataFrame:
    """
    Read a table from the project directory by name.

    Args:
        table_name: name of the table to read

    Returns:
        pd.DataFrame

    Raises:
        FileNotFoundError: if the table does not exist in the correct directory with the correct name
        ValueError: if the table format is not recognized
    """
    table_path = _get_table_path(table_name)
    if not os.path.exists(table_path):
        raise FileNotFoundError(f'Table does not exist: {table_path}')

    table_format = os.path.splitext(table_path)[-1]
    if table_format == '.parquet':
        return pd.read_parquet(table_path, engine='fastparquet')
    elif table_format == '.feather':
        return pd.read_feather(table_path)
    elif table_format == '.csv':
        return pd.read_csv(table_path, dtype=str)
    else:
        raise ValueError(f'Unknown table format: {table_format}')

`write_gis(gdf, name)`

Write a GIS file to the correct location in the project directory

Parameters:

Name	Type	Description	Default
`gdf`	`gpd.GeoDataFrame`	the geopandas GeoDataFrame to write to disc	required
`name`	`str`	the name of the GIS file	required

Returns:

Type	Description
`None`	None

Raises:

Type	Description
`ValueError`	if the GIS dataset name is not recognized

Source code in saber/io.py

def write_gis(gdf: gpd.GeoDataFrame, name: str) -> None:
    """
    Write a GIS file to the correct location in the project directory

    Args:
        gdf: the geopandas GeoDataFrame to write to disc
        name: the name of the GIS file

    Returns:
        None

    Raises:
        ValueError: if the GIS dataset name is not recognized
    """
    assert name in VALID_GIS_NAMES or name in GENERATE_GIS_NAMES_MAP, \
        ValueError(f'"{name}" is not a recognized GIS dataset name')
    gdf.to_file(_get_gis_path(name), driver='GPKG')
    return

`write_table(df, name)`

Write a table to the correct location in the project directory

Parameters:

Name	Type	Description	Default
`df`	`pd.DataFrame`	the pandas DataFrame to write	required
`name`	`str`	the name of the table to write	required

Returns:

Type	Description
`None`	None

Raises:

Type	Description
`ValueError`	if the table format is not recognized

Source code in saber/io.py

def write_table(df: pd.DataFrame, name: str) -> None:
    """
    Write a table to the correct location in the project directory

    Args:
        df: the pandas DataFrame to write
        name: the name of the table to write

    Returns:
        None

    Raises:
        ValueError: if the table format is not recognized
    """
    table_path = _get_table_path(name)
    table_format = os.path.splitext(table_path)[-1]
    if table_format == '.parquet':
        return df.to_parquet(table_path)
    elif table_format == '.feather':
        return df.to_feather(table_path)
    elif table_format == '.csv':
        return df.to_csv(table_path, index=False)
    else:
        raise ValueError(f'Unknown table format: {table_format}')