readabs.read_abs_series

Get specific ABS data series by their ABS series identifiers.

  1"""Get specific ABS data series by their ABS series identifiers."""
  2
  3from collections.abc import Sequence
  4from typing import Unpack, cast
  5
  6from pandas import DataFrame, Index, PeriodIndex, concat
  7
  8from readabs.abs_meta_data import metacol
  9from readabs.read_abs_cat import read_abs_cat
 10from readabs.read_support import ReadArgs, check_kwargs, get_args
 11
 12
 13# --- functions
 14def read_abs_series(
 15    cat: str,
 16    series_id: str | Sequence[str],
 17    **kwargs: Unpack[ReadArgs],
 18) -> tuple[DataFrame, DataFrame]:
 19    """Get specific ABS data series by their ABS catalogue and series identifiers.
 20
 21    Parameters
 22    ----------
 23    cat : str
 24        The ABS catalogue ID.
 25
 26    series_id : str | Sequence[str]
 27        An ABS series ID or a sequence of ABS series IDs.
 28
 29    **kwargs : Any
 30        Keyword arguments for the read_abs_series function,
 31        which are the same as the keyword arguments for the
 32        read_abs_cat function.
 33
 34    Returns
 35    -------
 36    tuple[DataFrame, DataFrame]
 37        A tuple of two DataFrames, one for the primary data and one for the metadata.
 38
 39    Example
 40    -------
 41
 42    ```python
 43    import readabs as ra
 44    from pandas import DataFrame
 45    cat_num = "6202.0"  # The ABS labour force survey
 46    unemployment_rate = "A84423050A"
 47    seo = "6202001"  # The ABS table name
 48    data, meta = ra.read_abs_series(
 49        cat=cat_num, series_id=unemployment_rate, single_excel_only=seo
 50    )
 51    ```
 52
 53    """
 54    # check for unexpected keyword arguments/get defaults
 55    check_kwargs(kwargs, "read_abs_series")
 56    args = get_args(kwargs, "read_abs_series")
 57
 58    # read the ABS category data
 59    cat_data, cat_meta = read_abs_cat(cat, **args)
 60
 61    # drop repeated series_ids in the meta data,
 62    # make unique series_ids the index
 63    cat_meta.index = Index(cat_meta[metacol.id])
 64    cat_meta = cat_meta.groupby(cat_meta.index).first()
 65
 66    # get the ABS series data
 67    if isinstance(series_id, str):
 68        series_id = [series_id]
 69    return_data, return_meta = DataFrame(), DataFrame()
 70    for identifier in series_id:
 71        # confirm that the series ID is in the catalogue
 72        if identifier not in cat_meta.index:
 73            if args["verbose"]:
 74                print(f"Series ID {identifier} not found in ABS catalogue ID {cat}")
 75            if args["ignore_errors"]:
 76                continue
 77            raise ValueError(f"Series ID {identifier} not found in catalogue {cat}")
 78
 79        # confirm thay the index of the series is compatible
 80        table = str(cat_meta.loc[identifier, metacol.table])  # str for mypy
 81        data_series = cat_data[table][identifier]
 82        if (
 83            len(return_data) > 0
 84            and cast("PeriodIndex", return_data.index).freq != cast("PeriodIndex", data_series.index).freq
 85        ):
 86            if args["verbose"]:
 87                print(f"Frequency mismatch for series ID {identifier}")
 88            if args["ignore_errors"]:
 89                continue
 90            raise ValueError(f"Frequency mismatch for series ID {identifier}")
 91
 92        # add the series data and meta data to the return values
 93        if len(return_data) > 0:
 94            return_data = return_data.reindex(return_data.index.union(data_series.index))
 95        return_data[identifier] = data_series
 96        return_meta = concat([return_meta, cat_meta.loc[identifier]], axis=1)
 97
 98    return return_data, return_meta.T
 99
100
101if __name__ == "__main__":
102
103    def simple_test() -> None:
104        """Test the read_abs_series function."""
105        # simple test
106        # Trimmed Mean - through the year CPI growth - seasonally adjusted
107        data, meta = read_abs_series("6401.0", "A3604511X", single_excel_only="640106")
108        print(data.tail())
109        print(meta.T)
110        print("Done")
111
112    simple_test()
def read_abs_series( cat: str, series_id: str | Sequence[str], **kwargs: Unpack[readabs.ReadArgs]) -> tuple[pandas.DataFrame, pandas.DataFrame]:
15def read_abs_series(
16    cat: str,
17    series_id: str | Sequence[str],
18    **kwargs: Unpack[ReadArgs],
19) -> tuple[DataFrame, DataFrame]:
20    """Get specific ABS data series by their ABS catalogue and series identifiers.
21
22    Parameters
23    ----------
24    cat : str
25        The ABS catalogue ID.
26
27    series_id : str | Sequence[str]
28        An ABS series ID or a sequence of ABS series IDs.
29
30    **kwargs : Any
31        Keyword arguments for the read_abs_series function,
32        which are the same as the keyword arguments for the
33        read_abs_cat function.
34
35    Returns
36    -------
37    tuple[DataFrame, DataFrame]
38        A tuple of two DataFrames, one for the primary data and one for the metadata.
39
40    Example
41    -------
42
43    ```python
44    import readabs as ra
45    from pandas import DataFrame
46    cat_num = "6202.0"  # The ABS labour force survey
47    unemployment_rate = "A84423050A"
48    seo = "6202001"  # The ABS table name
49    data, meta = ra.read_abs_series(
50        cat=cat_num, series_id=unemployment_rate, single_excel_only=seo
51    )
52    ```
53
54    """
55    # check for unexpected keyword arguments/get defaults
56    check_kwargs(kwargs, "read_abs_series")
57    args = get_args(kwargs, "read_abs_series")
58
59    # read the ABS category data
60    cat_data, cat_meta = read_abs_cat(cat, **args)
61
62    # drop repeated series_ids in the meta data,
63    # make unique series_ids the index
64    cat_meta.index = Index(cat_meta[metacol.id])
65    cat_meta = cat_meta.groupby(cat_meta.index).first()
66
67    # get the ABS series data
68    if isinstance(series_id, str):
69        series_id = [series_id]
70    return_data, return_meta = DataFrame(), DataFrame()
71    for identifier in series_id:
72        # confirm that the series ID is in the catalogue
73        if identifier not in cat_meta.index:
74            if args["verbose"]:
75                print(f"Series ID {identifier} not found in ABS catalogue ID {cat}")
76            if args["ignore_errors"]:
77                continue
78            raise ValueError(f"Series ID {identifier} not found in catalogue {cat}")
79
80        # confirm thay the index of the series is compatible
81        table = str(cat_meta.loc[identifier, metacol.table])  # str for mypy
82        data_series = cat_data[table][identifier]
83        if (
84            len(return_data) > 0
85            and cast("PeriodIndex", return_data.index).freq != cast("PeriodIndex", data_series.index).freq
86        ):
87            if args["verbose"]:
88                print(f"Frequency mismatch for series ID {identifier}")
89            if args["ignore_errors"]:
90                continue
91            raise ValueError(f"Frequency mismatch for series ID {identifier}")
92
93        # add the series data and meta data to the return values
94        if len(return_data) > 0:
95            return_data = return_data.reindex(return_data.index.union(data_series.index))
96        return_data[identifier] = data_series
97        return_meta = concat([return_meta, cat_meta.loc[identifier]], axis=1)
98
99    return return_data, return_meta.T

Get specific ABS data series by their ABS catalogue and series identifiers.

Parameters

cat : str The ABS catalogue ID.

series_id : str | Sequence[str] An ABS series ID or a sequence of ABS series IDs.

**kwargs : Any Keyword arguments for the read_abs_series function, which are the same as the keyword arguments for the read_abs_cat function.

Returns

tuple[DataFrame, DataFrame] A tuple of two DataFrames, one for the primary data and one for the metadata.

Example

import readabs as ra
from pandas import DataFrame
cat_num = "6202.0"  # The ABS labour force survey
unemployment_rate = "A84423050A"
seo = "6202001"  # The ABS table name
data, meta = ra.read_abs_series(
    cat=cat_num, series_id=unemployment_rate, single_excel_only=seo
)