sdmxabs.fetch_multi

Fetch multiple datasets from the SDMX API.

  1"""Fetch multiple datasets from the SDMX API."""
  2
  3from typing import Unpack
  4
  5import pandas as pd
  6
  7from sdmxabs.download_cache import CacheError, GetFileKwargs, HttpError
  8from sdmxabs.fetch import fetch
  9
 10
 11# --- private function
 12def extract(
 13    wanted: pd.DataFrame, *, validate: bool = False, **kwargs: Unpack[GetFileKwargs]
 14) -> tuple[pd.DataFrame, pd.DataFrame]:  # data / metadata
 15    """Extract the data and metadata for each row in the dimensions DataFrame.
 16
 17    Args:
 18        wanted (pd.DataFrame): DataFrame containing the dimensions to fetch.
 19        validate (bool): If True, the function will validate the dimensions and values
 20                         against the ABS SDMX API codelists. Defaults to False.
 21        **kwargs: Additional keyword arguments passed to the underlying data fetching function.
 22
 23    Returns:
 24        tuple[pd.DataFrame, pd.DataFrame]: A DataFrame with the fetched data and
 25                                        a DataFrame with the metadata.
 26
 27    Raises:
 28        ValueError: if any input data is not as expected.
 29
 30    Note: CacheError and HttpError are raised by the fetch function.
 31          These will be caught and reported to standard output.
 32
 33    """
 34    # --- initial setup - empty return results
 35    return_meta = {}
 36    return_data = {}
 37    counter = 0
 38
 39    # --- loop over the rows of the wanted DataFrame
 40    for _index, row in wanted.iterrows():
 41        # --- get the arguments for the fetch
 42        row_dict: dict[str, str] = row.to_dict()
 43        flow_id = row_dict.pop("flow_id", "")
 44        if not flow_id:
 45            # --- if there is no flow_id, we will skip this row
 46            print(f"Skipping row with no flow_id: {row_dict}")
 47            continue
 48
 49        # --- fetch the data and meta data for each row of the selection table
 50        try:
 51            data, meta = fetch(flow_id, dims=row_dict, validate=validate, **kwargs)
 52        except (CacheError, HttpError, ValueError) as e:
 53            # --- if there is an error, we will skip this row
 54            print(f"Error fetching {flow_id} with dimensions {row_dict}: {e}")
 55            continue
 56        if data.empty or meta.empty:
 57            # --- this should not happen, but if it does, we will skip this row
 58            print(f"No data for {flow_id} with dimensions {row_dict}")
 59            continue
 60
 61        # --- manage duplicates
 62        for col in data.columns:
 63            counter += 1
 64            save_name = col
 65            if save_name in return_data:
 66                save_name += f"_{counter:03d}"
 67            return_data[save_name] = data[col]
 68            return_meta[save_name] = meta.loc[col]
 69
 70    return pd.DataFrame(return_data), pd.DataFrame(return_meta).T
 71
 72
 73# --- public function
 74def fetch_multi(
 75    wanted: pd.DataFrame,
 76    *,
 77    validate: bool = False,
 78    **kwargs: Unpack[GetFileKwargs],
 79) -> tuple[pd.DataFrame, pd.DataFrame]:
 80    """Fetch multiple SDMX datasets based on a DataFrame of desired datasets.
 81
 82    Args:
 83        wanted: A DataFrame with rows for each desired data set (of one or more series).
 84                Each row should contain the necessary identifiers to fetch the dataset.
 85                The columns will be 'flow_id', plus the ABS dimensions relevant to the flow.
 86                The 'flow_id' column is mandatory, and the rest are optional.
 87                Note: the DataFrame index is not used in the fetching process.
 88        validate: If True, the function will validate dimensions and values against
 89                  the ABS SDMX API codelists. Defaults to False.
 90        **kwargs: Additional keyword arguments passed to the underlying data fetching function.
 91
 92    Returns:
 93        A tuple containing two DataFrames:
 94        - The first DataFrame contains the fetched data.
 95        - The second DataFrame contains metadata about the fetched datasets.
 96
 97    Raises:
 98        ValueError: If the 'flow_id' column is missing from the `wanted` DataFrame.
 99
100    Note:
101        CacheError and HttpError are raised by the fetch function.
102        These will be caught and reported to standard output.
103
104    Caution:
105        The selected data should all have the same index. You cannot mix (for example)
106        Quarterly and Monthly data in the same DataFrame.
107
108    """
109    # --- quick sanity checks
110    if wanted.empty:
111        print("wanted DataFrame is empty, returning empty DataFrames.")
112        return pd.DataFrame(), pd.DataFrame()
113    if "flow_id" not in wanted.columns:
114        raise ValueError("The 'flow_id' column is required in the 'wanted' DataFrame.")
115
116    # --- do the work
117    return extract(wanted, validate=validate, **kwargs)
def extract( wanted: pandas.core.frame.DataFrame, *, validate: bool = False, **kwargs: Unpack[sdmxabs.GetFileKwargs]) -> tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame]:
13def extract(
14    wanted: pd.DataFrame, *, validate: bool = False, **kwargs: Unpack[GetFileKwargs]
15) -> tuple[pd.DataFrame, pd.DataFrame]:  # data / metadata
16    """Extract the data and metadata for each row in the dimensions DataFrame.
17
18    Args:
19        wanted (pd.DataFrame): DataFrame containing the dimensions to fetch.
20        validate (bool): If True, the function will validate the dimensions and values
21                         against the ABS SDMX API codelists. Defaults to False.
22        **kwargs: Additional keyword arguments passed to the underlying data fetching function.
23
24    Returns:
25        tuple[pd.DataFrame, pd.DataFrame]: A DataFrame with the fetched data and
26                                        a DataFrame with the metadata.
27
28    Raises:
29        ValueError: if any input data is not as expected.
30
31    Note: CacheError and HttpError are raised by the fetch function.
32          These will be caught and reported to standard output.
33
34    """
35    # --- initial setup - empty return results
36    return_meta = {}
37    return_data = {}
38    counter = 0
39
40    # --- loop over the rows of the wanted DataFrame
41    for _index, row in wanted.iterrows():
42        # --- get the arguments for the fetch
43        row_dict: dict[str, str] = row.to_dict()
44        flow_id = row_dict.pop("flow_id", "")
45        if not flow_id:
46            # --- if there is no flow_id, we will skip this row
47            print(f"Skipping row with no flow_id: {row_dict}")
48            continue
49
50        # --- fetch the data and meta data for each row of the selection table
51        try:
52            data, meta = fetch(flow_id, dims=row_dict, validate=validate, **kwargs)
53        except (CacheError, HttpError, ValueError) as e:
54            # --- if there is an error, we will skip this row
55            print(f"Error fetching {flow_id} with dimensions {row_dict}: {e}")
56            continue
57        if data.empty or meta.empty:
58            # --- this should not happen, but if it does, we will skip this row
59            print(f"No data for {flow_id} with dimensions {row_dict}")
60            continue
61
62        # --- manage duplicates
63        for col in data.columns:
64            counter += 1
65            save_name = col
66            if save_name in return_data:
67                save_name += f"_{counter:03d}"
68            return_data[save_name] = data[col]
69            return_meta[save_name] = meta.loc[col]
70
71    return pd.DataFrame(return_data), pd.DataFrame(return_meta).T

Extract the data and metadata for each row in the dimensions DataFrame.

Args: wanted (pd.DataFrame): DataFrame containing the dimensions to fetch. validate (bool): If True, the function will validate the dimensions and values against the ABS SDMX API codelists. Defaults to False. **kwargs: Additional keyword arguments passed to the underlying data fetching function.

Returns: tuple[pd.DataFrame, pd.DataFrame]: A DataFrame with the fetched data and a DataFrame with the metadata.

Raises: ValueError: if any input data is not as expected.

Note: CacheError and HttpError are raised by the fetch function. These will be caught and reported to standard output.

def fetch_multi( wanted: pandas.core.frame.DataFrame, *, validate: bool = False, **kwargs: Unpack[sdmxabs.GetFileKwargs]) -> tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame]:
 75def fetch_multi(
 76    wanted: pd.DataFrame,
 77    *,
 78    validate: bool = False,
 79    **kwargs: Unpack[GetFileKwargs],
 80) -> tuple[pd.DataFrame, pd.DataFrame]:
 81    """Fetch multiple SDMX datasets based on a DataFrame of desired datasets.
 82
 83    Args:
 84        wanted: A DataFrame with rows for each desired data set (of one or more series).
 85                Each row should contain the necessary identifiers to fetch the dataset.
 86                The columns will be 'flow_id', plus the ABS dimensions relevant to the flow.
 87                The 'flow_id' column is mandatory, and the rest are optional.
 88                Note: the DataFrame index is not used in the fetching process.
 89        validate: If True, the function will validate dimensions and values against
 90                  the ABS SDMX API codelists. Defaults to False.
 91        **kwargs: Additional keyword arguments passed to the underlying data fetching function.
 92
 93    Returns:
 94        A tuple containing two DataFrames:
 95        - The first DataFrame contains the fetched data.
 96        - The second DataFrame contains metadata about the fetched datasets.
 97
 98    Raises:
 99        ValueError: If the 'flow_id' column is missing from the `wanted` DataFrame.
100
101    Note:
102        CacheError and HttpError are raised by the fetch function.
103        These will be caught and reported to standard output.
104
105    Caution:
106        The selected data should all have the same index. You cannot mix (for example)
107        Quarterly and Monthly data in the same DataFrame.
108
109    """
110    # --- quick sanity checks
111    if wanted.empty:
112        print("wanted DataFrame is empty, returning empty DataFrames.")
113        return pd.DataFrame(), pd.DataFrame()
114    if "flow_id" not in wanted.columns:
115        raise ValueError("The 'flow_id' column is required in the 'wanted' DataFrame.")
116
117    # --- do the work
118    return extract(wanted, validate=validate, **kwargs)

Fetch multiple SDMX datasets based on a DataFrame of desired datasets.

Args: wanted: A DataFrame with rows for each desired data set (of one or more series). Each row should contain the necessary identifiers to fetch the dataset. The columns will be 'flow_id', plus the ABS dimensions relevant to the flow. The 'flow_id' column is mandatory, and the rest are optional. Note: the DataFrame index is not used in the fetching process. validate: If True, the function will validate dimensions and values against the ABS SDMX API codelists. Defaults to False. **kwargs: Additional keyword arguments passed to the underlying data fetching function.

Returns: A tuple containing two DataFrames: - The first DataFrame contains the fetched data. - The second DataFrame contains metadata about the fetched datasets.

Raises: ValueError: If the 'flow_id' column is missing from the wanted DataFrame.

Note: CacheError and HttpError are raised by the fetch function. These will be caught and reported to standard output.

Caution: The selected data should all have the same index. You cannot mix (for example) Quarterly and Monthly data in the same DataFrame.