Skip to content

saber.io

get_dir(dir_name)

Get the path to a directory within the workspace

Parameters:

Name Type Description Default
dir_name str

name of the directory

required

Returns:

Type Description
str

path to the directory

Source code in saber/io.py
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
def get_dir(dir_name: str) -> str:
    """
    Get the path to a directory within the workspace

    Args:
        dir_name: name of the directory

    Returns:
        path to the directory
    """
    assert dir_name in [DIR_TABLES, DIR_GIS, DIR_CLUSTERS, DIR_VALID], f'"{dir_name}" is not a valid directory name'
    table_path = os.path.join(workdir, dir_name)
    if not os.path.exists(table_path):
        logger.warning(f'"{dir_name}" directory does not exist. Error imminent: {table_path}')
    return table_path

get_state(prop)

Get a state variable provided by the config or a controlled global variable

Parameters:

Name Type Description Default
prop

name of the global variable

required

Returns:

Type Description

value of the global variable

Source code in saber/io.py
207
208
209
210
211
212
213
214
215
216
217
218
def get_state(prop) -> int or str:
    """
    Get a state variable provided by the config or a controlled global variable

    Args:
        prop: name of the global variable

    Returns:
        value of the global variable
    """
    assert prop in globals(), ValueError(f'"{prop}" is not a recognized project state key')
    return globals()[prop]

init_workdir(path=None, overwrite=False)

Creates the correct directories for a Saber project within the specified directory

Parameters:

Name Type Description Default
path str

the path to a directory where you want to create workdir subdirectories

None
overwrite bool

boolean flag, delete existing directories and files and recreate the directory structure?

False

Returns:

Type Description
None

None

Raises:

Type Description
NotADirectoryError

if the path is not a directory

Source code in saber/io.py
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
def init_workdir(path: str = None, overwrite: bool = False) -> None:
    """
    Creates the correct directories for a Saber project within the specified directory

    Args:
        path: the path to a directory where you want to create workdir subdirectories
        overwrite: boolean flag, delete existing directories and files and recreate the directory structure?

    Returns:
        None

    Raises:
        NotADirectoryError: if the path is not a directory
    """
    if path is None:
        path = workdir

    if not os.path.exists(path):
        logger.warning(f'Provided path to workdir does not exist. Attempting to create: {path}')
        os.makedirs(path)
    elif overwrite:
        logger.warning(f'overwrite=True, Deleting existing workdir: {workdir}')
        shutil.rmtree(path)
        os.makedirs(path)

    for d in DIR_LIST:
        p = os.path.join(path, d)
        if not os.path.exists(p):
            os.mkdir(p)
    return

list_cluster_files(n_clusters='all')

Find all the kmeans model files in the project directory.

Parameters:

Name Type Description Default
n_clusters

the number of clusters to find models for. If 'all', all models will be returned

'all'

Returns:

Type Description
List[str]

List of paths to the kmeans model files

Raises:

Type Description
TypeError

if n_clusters is not an int, iterable of int, or 'all'

Source code in saber/io.py
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
def list_cluster_files(n_clusters: int or Iterable = 'all') -> List[str]:
    """
    Find all the kmeans model files in the project directory.

    Args:
        n_clusters: the number of clusters to find models for. If 'all', all models will be returned

    Returns:
        List of paths to the kmeans model files

    Raises:
        TypeError: if n_clusters is not an int, iterable of int, or 'all'
    """
    kmeans_dir = os.path.join(workdir, DIR_CLUSTERS)
    if n_clusters == 'all':
        return natsorted(glob.glob(os.path.join(kmeans_dir, 'kmeans-*.pickle')))
    elif isinstance(n_clusters, int):
        return glob.glob(os.path.join(kmeans_dir, f'kmeans-{n_clusters}.pickle'))
    elif isinstance(n_clusters, Iterable):
        return natsorted([os.path.join(kmeans_dir, f'kmeans-{i}.pickle') for i in n_clusters])
    else:
        raise TypeError('n_clusters should be of type int or an iterable')

read_config(config)

Read the config file to set paths and values

Parameters:

Name Type Description Default
config str

path to the config file

required

Returns:

Type Description
None

None

Source code in saber/io.py
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
def read_config(config: str) -> None:
    """
    Read the config file to set paths and values

    Args:
        config: path to the config file

    Returns:
        None
    """
    # open a yml and read to dictionary
    with open(config, 'r') as f:
        config_dict = yaml.safe_load(f)

    if config_dict is None:
        raise ValueError('Config file is empty')

    # set global variables
    for key, value in config_dict.items():
        if key not in VALID_YAML_KEYS:
            logger.error(f'Ignored invalid key in config file: "{key}". Consult docs for valid keys.')
            continue
        logger.info(f'Config: {key} = {value}')
        globals()[key] = value

    # validate inputs
    if not os.path.isdir(workdir):
        logger.warning(f'Workspace directory does not exist: {workdir}')
    if not os.path.exists(drain_gis):
        logger.warning(f'Drainage network GIS file does not exist: {drain_gis}')
    if not os.path.exists(gauge_gis):
        logger.warning(f'Gauge network GIS file does not exist: {gauge_gis}')
    if not os.path.isdir(gauge_data):
        logger.warning(f'Gauge data directory does not exist: {gauge_data}')
    if not glob.glob(hindcast_zarr):
        logger.warning(f'Hindcast zarr directory does not exist or is empty: {hindcast_zarr}')

    return

read_gis(name)

Read a GIS file from the project directory by name.

Parameters:

Name Type Description Default
name str

name of the GIS file to read

required

Returns:

Type Description
gpd.GeoDataFrame

gpd.GeoDataFrame

Raises:

Type Description
ValueError

if the GIS format is not recognized

Source code in saber/io.py
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
def read_gis(name: str) -> gpd.GeoDataFrame:
    """
    Read a GIS file from the project directory by name.

    Args:
        name: name of the GIS file to read

    Returns:
        gpd.GeoDataFrame

    Raises:
        ValueError: if the GIS format is not recognized
    """
    assert name in VALID_GIS_NAMES or name in GENERATE_GIS_NAMES_MAP, \
        ValueError(f'"{name}" is not a recognized project state key')
    return gpd.read_file(_get_gis_path(name))

read_table(table_name)

Read a table from the project directory by name.

Parameters:

Name Type Description Default
table_name str

name of the table to read

required

Returns:

Type Description
pd.DataFrame

pd.DataFrame

Raises:

Type Description
FileNotFoundError

if the table does not exist in the correct directory with the correct name

ValueError

if the table format is not recognized

Source code in saber/io.py
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
def read_table(table_name: str) -> pd.DataFrame:
    """
    Read a table from the project directory by name.

    Args:
        table_name: name of the table to read

    Returns:
        pd.DataFrame

    Raises:
        FileNotFoundError: if the table does not exist in the correct directory with the correct name
        ValueError: if the table format is not recognized
    """
    table_path = _get_table_path(table_name)
    if not os.path.exists(table_path):
        raise FileNotFoundError(f'Table does not exist: {table_path}')

    table_format = os.path.splitext(table_path)[-1]
    if table_format == '.parquet':
        return pd.read_parquet(table_path, engine='fastparquet')
    elif table_format == '.feather':
        return pd.read_feather(table_path)
    elif table_format == '.csv':
        return pd.read_csv(table_path, dtype=str)
    else:
        raise ValueError(f'Unknown table format: {table_format}')

write_gis(gdf, name)

Write a GIS file to the correct location in the project directory

Parameters:

Name Type Description Default
gdf gpd.GeoDataFrame

the geopandas GeoDataFrame to write to disc

required
name str

the name of the GIS file

required

Returns:

Type Description
None

None

Raises:

Type Description
ValueError

if the GIS dataset name is not recognized

Source code in saber/io.py
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
def write_gis(gdf: gpd.GeoDataFrame, name: str) -> None:
    """
    Write a GIS file to the correct location in the project directory

    Args:
        gdf: the geopandas GeoDataFrame to write to disc
        name: the name of the GIS file

    Returns:
        None

    Raises:
        ValueError: if the GIS dataset name is not recognized
    """
    assert name in VALID_GIS_NAMES or name in GENERATE_GIS_NAMES_MAP, \
        ValueError(f'"{name}" is not a recognized GIS dataset name')
    gdf.to_file(_get_gis_path(name), driver='GPKG')
    return

write_table(df, name)

Write a table to the correct location in the project directory

Parameters:

Name Type Description Default
df pd.DataFrame

the pandas DataFrame to write

required
name str

the name of the table to write

required

Returns:

Type Description
None

None

Raises:

Type Description
ValueError

if the table format is not recognized

Source code in saber/io.py
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
def write_table(df: pd.DataFrame, name: str) -> None:
    """
    Write a table to the correct location in the project directory

    Args:
        df: the pandas DataFrame to write
        name: the name of the table to write

    Returns:
        None

    Raises:
        ValueError: if the table format is not recognized
    """
    table_path = _get_table_path(name)
    table_format = os.path.splitext(table_path)[-1]
    if table_format == '.parquet':
        return df.to_parquet(table_path)
    elif table_format == '.feather':
        return df.to_feather(table_path)
    elif table_format == '.csv':
        return df.to_csv(table_path, index=False)
    else:
        raise ValueError(f'Unknown table format: {table_format}')