oceanum.datamesh package#

Submodules#

oceanum.datamesh.catalog module#

class oceanum.datamesh.catalog.Catalog(json)[source]#

Datamesh catalog This class behaves like an immutable dictionary with the datasource ids as keys

property ids#

Return a list of datasource ids

keys()[source]#

Return a list of datasource ids

load(id)[source]#

Load datasource

Parameters

id – Datasource id

Returns

The datasource container

Return type

Union[pandas.DataFrame, geopandas.GeoDataFrame, xarray.Dataset]

async load_async(id)[source]#

Load datasource asynchronously

Parameters

id – Datasource id

Returns

The datasource container

Return type

Couroutine<Union[pandas.DataFrame, geopandas.GeoDataFrame, xarray.Dataset]>

query(query)[source]#

Make a query on the catalog

Parameters

query (Union[oceanum.datamesh.Query, dict]) – Datamesh query as a query object or a valid query dictionary

Returns

The datasource container

Return type

Union[pandas.DataFrame, geopandas.GeoDataFrame, xarray.Dataset]

Raises

IndexError – Datasource not in catalog

async query_async(query)[source]#

Make an asynchronous query on the catalog

Parameters

query (Union[oceanum.datamesh.Query, dict]) – Datamesh query as a query object or a valid query dictionary

Returns

The datasource container

Return type

Coroutine<Union[pandas.DataFrame, geopandas.GeoDataFrame, xarray.Dataset]>

Raises

IndexError – Datasource not in catalog

oceanum.datamesh.connection module#

class oceanum.datamesh.connection.Connector(token=None, service='https://datamesh.oceanum.io', gateway=None)[source]#

Bases: object

Datamesh connector class.

All datamesh operations are methods of this class

delete_datasource(datasource_id)[source]#

Delete a datasource from datamesh. This will delete the datamesh registration and any stored data.

Parameters

datasource_id (string) – Unique datasource id

Returns

Return True for successfully deleted datasource

Return type

boolean

async delete_datasource_async(datasource_id)[source]#

Asynchronously delete a datasource from datamesh. This will delete the datamesh registration and any stored data.

Parameters

datasource_id (string) – Unique datasource id

Returns

Return True for successfully deleted datasource

Return type

boolean

get_catalog(search=None, timefilter=None, geofilter=None)[source]#

Get datamesh catalog

Parameters
  • search (string, optional) – Search string for filtering datasources

  • timefilter (Union[oceanum.datamesh.query.TimeFilter, list], Optional) – Time filter as valid Query TimeFilter or list of [start,end]

  • geofilter (Union[oceanum.datamesh.query.GeoFilter, dict, shapely.geometry], Optional) – Spatial filter as valid Query Geofilter or geojson geometry as dict or shapely Geometry

Returns

A datamesh catalog instance

Return type

oceanum.datamesh.Catalog

async get_catalog_async(filter={})[source]#

Get datamesh catalog asynchronously

Parameters
  • filter (dict, optional) – Set of filters to apply. Defaults to {}.

  • loop – event loop. default=None will use asyncio.get_running_loop()

  • executorconcurrent.futures.Executor instance. default=None will use the default executor

Returns

A datamesh catalog instance

Return type

Coroutine<oceanum.datamesh.Catalog>

get_datasource(datasource_id)[source]#

Get a Datasource instance from the datamesh. This does not load the actual data.

Parameters

datasource_id (string) – Unique datasource id

Returns

A datasource instance

Return type

oceanum.datamesh.Datasource

Raises

DatameshConnectError – Datasource cannot be found or is not authorized for the datamesh key

async get_datasource_async(datasource_id)[source]#

Get a Datasource instance from the datamesh asynchronously. This does not load the actual data.

Parameters
  • datasource_id (string) – Unique datasource id

  • loop – event loop. default=None will use asyncio.get_running_loop()

  • executorconcurrent.futures.Executor instance. default=None will use the default executor

Returns

A datasource instance

Return type

Coroutine<oceanum.datamesh.Datasource>

Raises

DatameshConnectError – Datasource cannot be found or is not authorized for the datamesh key

property host#

Datamesh host

Returns

Datamesh server host

Return type

string

load_datasource(datasource_id, parameters={}, use_dask=True)[source]#

Load a datasource into the work environment. For datasources which load into DataFrames or GeoDataFrames, this returns an in memory instance of the DataFrame. For datasources which load into an xarray Dataset, an open zarr backed dataset is returned.

Parameters
  • datasource_id (string) – Unique datasource id

  • parameters (dict) – Additional datasource parameters

  • use_dask (bool, optional) – Load datasource as a dask enabled datasource if possible. Defaults to True.

Returns

The datasource container

Return type

Union[pandas.DataFrame, geopandas.GeoDataFrame, xarray.Dataset]

async load_datasource_async(datasource_id, parameters={}, use_dask=True)[source]#

Load a datasource asynchronously into the work environment

Parameters
  • datasource_id (string) – Unique datasource id

  • use_dask (bool, optional) – Load datasource as a dask enabled datasource if possible. Defaults to True.

  • loop – event loop. default=None will use asyncio.get_running_loop()

  • executorconcurrent.futures.Executor instance. default=None will use the default executor

Returns

The datasource container

Return type

coroutine<Union[pandas.DataFrame, geopandas.GeoDataFrame, xarray.Dataset]>

query(query=None, *, use_dask=True, **query_keys)[source]#

Make a datamesh query

Parameters

query (Union[oceanum.datamesh.Query, dict]) – Datamesh query as a query object or a valid query dictionary

Kwargs:

use_dask (bool, optional): Load datasource as a dask enabled datasource if possible. Defaults to True. **query_keys: Keywords form of query, for example datamesh.query(datasource=”my_datasource”)

Returns

The datasource container

Return type

Union[pandas.DataFrame, geopandas.GeoDataFrame, xarray.Dataset]

async query_async(query, *, use_dask=True, **query_keys)[source]#

Make a datamesh query asynchronously

Parameters

query (Union[oceanum.datamesh.Query, dict]) – Datamesh query as a query object or a valid query dictionary

Kwargs:

use_dask (bool, optional): Load datasource as a dask enabled datasource if possible. Defaults to True. loop: event loop. default=None will use asyncio.get_running_loop() executor: concurrent.futures.Executor instance. default=None will use the default executor **query_keys: Keywords form of query, for example datamesh.query(datasource=”my_datasource”)

Returns

The datasource container

Return type

Coroutine<Union[pandas.DataFrame, geopandas.GeoDataFrame, xarray.Dataset]>

write_datasource(datasource_id, data, geometry=None, append=None, overwrite=False, **properties)[source]#

Write a datasource to datamesh from the work environment

Parameters
  • datasource_id (string) – Unique datasource id

  • data (Union[pandas.DataFrame, geopandas.GeoDataFrame, xarray.Dataset, None]) – The data to be written to datamesh. If data is None, just update metadata properties.

  • geometry (oceanum.datasource.Geometry, optional) – GeoJSON geometry of the datasource

  • append (string, optional) – Coordinate to append on. default=None

  • overwrite (bool, optional) – Overwrite existing datasource. default=False

  • **properties – Additional properties for the datasource - see oceanum.datamesh.Datasource

Returns

The datasource instance that was written to

Return type

oceanum.datamesh.Datasource

async write_datasource_async(datasource_id, data, append=None, overwrite=False, **properties)[source]#

Write a datasource to datamesh from the work environment asynchronously

Parameters
  • datasource_id (string) – Unique datasource id

  • data (Union[pandas.DataFrame, geopandas.GeoDataFrame, xarray.Dataset, None]) – The data to be written to datamesh. If data is None, just update metadata properties.

  • geometry (oceanum.datasource.Geometry) – GeoJSON geometry of the datasource

  • append (string, optional) – Coordinate to append on. default=None

  • overwrite (bool, optional) – Overwrite existing datasource. default=False

  • **properties – Additional properties for the datasource - see oceanum.datamesh.Datasource constructor

Returns

The datasource instance that was written to

Return type

Coroutine<oceanum.datamesh.Datasource>

exception oceanum.datamesh.connection.DatameshConnectError[source]#

Bases: Exception

exception oceanum.datamesh.connection.DatameshQueryError[source]#

Bases: Exception

exception oceanum.datamesh.connection.DatameshWriteError[source]#

Bases: Exception

oceanum.datamesh.connection.asyncwrapper(func)[source]#
oceanum.datamesh.connection.json_serial(obj)[source]#

JSON serializer for objects not serializable by default json code

oceanum.datamesh.datasource module#

pydantic model oceanum.datamesh.datasource.Datasource[source]#

Datasource

Show JSON schema
{
   "title": "Datasource",
   "description": "Datasource",
   "type": "object",
   "properties": {
      "id": {
         "title": "Datasource ID",
         "description": "Unique ID for the datasource",
         "maxLength": 80,
         "minLength": 3,
         "pattern": "^[a-z0-9-_]+$",
         "strip_whitespace": true,
         "to_lower": true,
         "type": "string"
      },
      "name": {
         "title": "Datasource name",
         "description": "Human readable name for the datasource",
         "maxLength": 64,
         "type": "string"
      },
      "description": {
         "title": "Datasource description",
         "description": "Description of datasource",
         "default": "",
         "maxLength": 1500,
         "type": "string"
      },
      "parameters": {
         "title": "Datasource parameters",
         "description": "Additional parameters for accessing datasource",
         "default": {},
         "type": "object"
      },
      "geom": {
         "title": "Geom"
      },
      "tstart": {
         "title": "Start time of datasource",
         "description": "Earliest time in datasource. Must be a valid ISO8601 datetime string",
         "type": "string",
         "format": "date-time"
      },
      "tend": {
         "title": "End time of datasource",
         "description": "Latest time in datasource. Must be a valid ISO8601 datetime string",
         "type": "string",
         "format": "date-time"
      },
      "parchive": {
         "title": "Datasource rolling archive period",
         "description": "Duration of a rolling archive (time before present). Must be a valid ISO8601 interval string or None.",
         "type": "number",
         "format": "time-delta"
      },
      "tags": {
         "title": "Datasource tags",
         "description": "Metadata keyword tags related to the datasource",
         "default": [],
         "type": "array",
         "items": {}
      },
      "info": {
         "title": "Datasource metadata",
         "description": "Additional datasource descriptive metadata",
         "default": "",
         "type": "object"
      },
      "schema": {
         "title": "Schema",
         "description": "Datasource schema",
         "default": {
            "attrs": {},
            "dims": {},
            "coords": {},
            "data_vars": {}
         },
         "allOf": [
            {
               "$ref": "#/definitions/Schema"
            }
         ]
      },
      "coordinates": {
         "title": "Coordinate keys",
         "description": "\n        Coordinates in datasource, referenced by standard coordinate keys. The dictionary keys map to coordinates variables in the datasource.\n            Ensemble: \"e\"\n            Rasterband: \"b\"\n            Category: \"c\"\n            Quantile: \"q\"\n            Season: \"s\"\n            Month: \"m\"\n            Time: \"t\"\n            Vertical: \"z\"\n            Northing: \"y\"\n            Easting: \"x\"\n            Station: \"s\"  (Locations assumed stationary, datasource with a multigeometry indexed by station coordinate)\n            Geometry: \"g\"  (Abstract coordinate - a 2 or 3D geometry that defines a feature location)\n            Frequency: \"f\"\n            Direction:\"d\"\n\n        Example {\"t\":\"time\",\"x\":\"longitude\",\"y\":\"latitude\"}\n        ",
         "type": "object",
         "additionalProperties": {
            "type": "string"
         }
      },
      "details": {
         "title": "Details",
         "description": "URL to further details about the datasource",
         "minLength": 1,
         "maxLength": 65536,
         "format": "uri",
         "type": "string"
      },
      "last_modified": {
         "title": "Last modified time",
         "description": "Last time datasource was modified",
         "default": "2023-01-10T19:39:50.742000",
         "type": "string",
         "format": "date-time"
      },
      "args": {
         "title": "Driver arguments",
         "description": "Driver arguments for datasource. These are driver dependent.",
         "type": "object"
      },
      "driver": {
         "title": "Driver",
         "type": "string"
      }
   },
   "required": [
      "id",
      "name",
      "coordinates",
      "driver"
   ],
   "definitions": {
      "Schema": {
         "title": "Schema",
         "type": "object",
         "properties": {
            "attrs": {
               "title": "Global attributes",
               "type": "object"
            },
            "dims": {
               "title": "Dimensions",
               "type": "object"
            },
            "coords": {
               "title": "Coordinates",
               "type": "object"
            },
            "data_vars": {
               "title": "Data variables",
               "type": "object"
            }
         }
      }
   }
}

Fields
field coordinates: Dict[Coordinates, str] [Required]#
Coordinates in datasource, referenced by standard coordinate keys. The dictionary keys map to coordinates variables in the datasource.

Ensemble: “e” Rasterband: “b” Category: “c” Quantile: “q” Season: “s” Month: “m” Time: “t” Vertical: “z” Northing: “y” Easting: “x” Station: “s” (Locations assumed stationary, datasource with a multigeometry indexed by station coordinate) Geometry: “g” (Abstract coordinate - a 2 or 3D geometry that defines a feature location) Frequency: “f” Direction:”d”

Example {“t”:”time”,”x”:”longitude”,”y”:”latitude”}

field dataschema: Optional[Schema] = Schema(attrs={}, dims={}, coords={}, data_vars={}) (alias 'schema')#

Datasource schema

field description: Optional[str] = ''#

Description of datasource

Constraints
  • maxLength = 1500

field details: Optional[AnyHttpUrl] = None#

URL to further details about the datasource

Constraints
  • minLength = 1

  • maxLength = 65536

  • format = uri

field driver: str [Required]#
field driver_args: Optional[dict] = None (alias 'args')#

Driver arguments for datasource. These are driver dependent.

field geom: Geometry [Required]#

Valid shapely or geoJSON geometry describing the spatial extent of the datasource

field id: str [Required]#

Unique ID for the datasource

Constraints
  • maxLength = 80

  • minLength = 3

  • pattern = ^[a-z0-9-_]+$

field info: Optional[dict] = ''#

Additional datasource descriptive metadata

field last_modified: Optional[datetime] = datetime.datetime(2023, 1, 10, 19, 39, 50, 742000)#

Last time datasource was modified

field name: str [Required]#

Human readable name for the datasource

Constraints
  • maxLength = 64

field parameters: Optional[dict] = {}#

Additional parameters for accessing datasource

field parchive: Optional[Timeperiod] = None#

Duration of a rolling archive (time before present). Must be a valid ISO8601 interval string or None.

field tags: Optional[list] = []#

Metadata keyword tags related to the datasource

field tend: Optional[datetime] = None#

Latest time in datasource. Must be a valid ISO8601 datetime string

field tstart: Optional[datetime] = None#

Earliest time in datasource. Must be a valid ISO8601 datetime string

property attributes#

Datasource global attributes. Note that these are None (undefined) for a summary dataset.

property bounds#

Bounding box of datasource geographical extent

Type

list[float]

property geometry#
property variables#

Datasource variables (or properties). Note that these are None (undefined) for a summary dataset.

oceanum.datamesh.query module#

pydantic model oceanum.datamesh.query.Aggregate[source]#

Bases: BaseModel

Fields
field operations: List[AggregateOps] = [<AggregateOps.mean: 'mean'>]#

List of aggregation operators to apply, from mean,min,max,std,sum

field spatial: Optional[bool] = True#

Aggregate over spatial dimensions (default True)

field temporal: Optional[bool] = True#

Aggregate over temporal dimension (default True)

class oceanum.datamesh.query.AggregateOps(value)[source]#

Bases: str, Enum

An enumeration.

max = 'max'#
mean = 'mean'#
min = 'min'#
std = 'std'#
sum = 'sum'#
class oceanum.datamesh.query.Container(value)[source]#

Bases: str, Enum

An enumeration.

DataFrame = 'dataframe'#
Dataset = 'dataset'#
GeoDataFrame = 'geodataframe'#
pydantic model oceanum.datamesh.query.DatasourceGeom[source]#

Bases: BaseModel

Fields
field id: str [Required]#
field parameters: Optional[Dict] = {}#
pydantic model oceanum.datamesh.query.GeoFilter[source]#

Bases: BaseModel

GeoFilter class Describes a spatial subset or interpolation

Fields
field geom: Union[List, Feature] [Required]#
  • For type=’feature’, geojson feature.

  • For type=’bbox’, list[x_min,y_min,x_max,y_max] in CRS units.

field resolution: Optional[float] = 0.0#

Maximum resolution of the data for downsampling in CRS units. Only works for feature datasources.

field type: GeoFilterType = GeoFilterType.bbox#
Type of the geofilter. Can be one of:
  • ‘feature’: Select with a geojson feature

  • ‘bbox’: Select with a bounding box

class oceanum.datamesh.query.GeoFilterType(value)[source]#

Bases: Enum

An enumeration.

bbox = 'bbox'#
feature = 'feature'#
pydantic model oceanum.datamesh.query.Query[source]#

Bases: BaseModel

Datamesh query

Fields
field aggregate: Optional[Aggregate] = None#

Optional aggregation operators to apply to query after filtering

field crs: Optional[Union[str, int]] = None#

Valid CRS string for returned data

field datasource: str [Required]#

Datasource ID

Constraints
  • maxLength = 80

  • minLength = 3

field description: Optional[str] = None#

Human readable description of this query

field geofilter: Optional[GeoFilter] = None#
field parameters: Optional[Dict] = {}#

Dictionary of driver parameters to pass to datasource

field timefilter: Optional[TimeFilter] = None#

Temporal filter or interplator

field variables: Optional[List[str]] = None#

List of requested variables.

exception oceanum.datamesh.query.QueryError[source]#

Bases: Exception

class oceanum.datamesh.query.ResampleType(value)[source]#

Bases: str, Enum

An enumeration.

mean = 'mean'#
pydantic model oceanum.datamesh.query.Stage[source]#

Bases: BaseModel

Fields
field container: Container [Required]#
field coords: dict [Required]#
field dlen: int [Required]#
field formats: List[str] [Required]#
field qhash: str [Required]#
field query: Query [Required]#
field size: int [Required]#
pydantic model oceanum.datamesh.query.TimeFilter[source]#

Bases: BaseModel

TimeFilter class Describes a temporal subset or interpolation

Fields
field resample: Optional[ResampleType] = ResampleType.mean#

Resampling method applied when reducing tempral resolution. Only valid with range type

field resolution: Optional[str] = 'native'#

Maximum resolution of the data for temporal downsampling. Only valid with range type

field times: List[Optional[Timestamp]] [Required]#
  • For type=’range’, [timestart, tend].

field type: TimeFilterType = TimeFilterType.range#
Type of the timefilter. Can be one of:
  • ‘range’: Select times within a range

class oceanum.datamesh.query.TimeFilterType(value)[source]#

Bases: str, Enum

An enumeration.

range = 'range'#
class oceanum.datamesh.query.Timestamp(ts_input=<object object>, freq=None, tz=None, unit=None, year=None, month=None, day=None, hour=None, minute=None, second=None, microsecond=None, nanosecond=None, tzinfo=None, *, fold=None)[source]#

Bases: Timestamp

classmethod validate(v)[source]#

Module contents#