sdmxabs.fetch_pop

Fetch Australian population data from the ABS SDMX API, either ERP or implied from National Accounts.

  1"""Fetch Australian population data from the ABS SDMX API, either ERP or implied from National Accounts."""
  2
  3from typing import Literal, Unpack
  4
  5import numpy as np
  6import pandas as pd
  7
  8from sdmxabs.download_cache import GetFileKwargs
  9from sdmxabs.fetch_gdp import fetch_gdp
 10from sdmxabs.fetch_selection import MatchType as Mt
 11from sdmxabs.fetch_selection import fetch_selection, match_item
 12
 13# --- constants
 14QUARTERS_IN_YEAR = 4
 15LAST_QUARTER_TOO_OLD_FOR_PROJECTION = 4
 16
 17
 18# --- private functions
 19def _erp_population(
 20    parameters: dict[str, str] | None,
 21    *,
 22    validate: bool,
 23    **kwargs: Unpack[GetFileKwargs],
 24) -> tuple[pd.DataFrame, pd.DataFrame]:
 25    """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API."""
 26    flow_id = "ERP_COMP_Q"
 27    selection_criteria = []
 28    selection_criteria.append(match_item("Estimated Resident Population", "MEASURE", Mt.EXACT))
 29    selection_criteria.append(match_item("Australia", "REGION", Mt.EXACT))
 30    selection_criteria.append(match_item("Q", "FREQ", Mt.EXACT))
 31    d, m = fetch_selection(flow_id, selection_criteria, parameters, validate=validate, **kwargs)
 32    d.columns = m.index = pd.Index(["Estimated Resident Population"])
 33    return d, m
 34
 35
 36def _na_population(
 37    parameters: dict[str, str] | None,
 38    *,
 39    validate: bool,
 40    **kwargs: Unpack[GetFileKwargs],
 41) -> tuple[pd.DataFrame, pd.DataFrame]:
 42    """Extrapolate population from the National Accounts data from the ABS SDMX API."""
 43    # --- Fetch GDP data
 44    gdp, _ = fetch_gdp(
 45        seasonality="o",
 46        price_measure="cp",
 47        parameters=parameters,
 48        validate=validate,
 49        **kwargs,
 50    )
 51
 52    # --- Fetch GDP per capita data
 53    selection_criteria = []
 54    selection_criteria.append(match_item("Original", "TSEST", Mt.EXACT))
 55    selection_criteria.append(match_item("Current prices", "MEASURE", Mt.EXACT))
 56    selection_criteria.append(match_item("GDP per capita", "DATA_ITEM", Mt.EXACT))
 57    flow_id = "ANA_AGG"
 58    d, m = fetch_selection(flow_id, selection_criteria, parameters, validate=validate, **kwargs)
 59
 60    # --- Extrapolate population from the above two series, Fudge meta-data
 61    name = "Implicit Population from GDP"
 62    gdp_s = gdp[gdp.columns[0]].astype(float)
 63    gdppc_s = d[d.columns[0]].astype(float)
 64    pop_s = gdp_s.div(gdppc_s) * 1_000
 65    d = pd.DataFrame(pop_s)
 66    d.columns = m.index = pd.Index([name])
 67    for k, v in {"UNIT_MEASURE": "NUM", "UNIT_MULT": "3", "DATA_ITEM": name}.items():
 68        if k not in m.columns:
 69            continue
 70        m.loc[name, k] = v
 71    return d, m
 72
 73
 74def _make_projection(data: pd.DataFrame) -> pd.DataFrame:
 75    """Make a naive projection of the population data forward to the current quarter.
 76
 77    Return original data if (for example) the data is empty or too old for a reasonable
 78    projection. The projection is based on the annual growth over the latest quarters.
 79
 80    """
 81    # --- validation/preparation
 82    if data.empty:
 83        return data  # No data to project
 84    current_quarter = pd.Timestamp.now().to_period("Q")
 85    last_period = data.index[-1]
 86    if last_period >= current_quarter:
 87        return data  # No projection needed
 88    if last_period < current_quarter - LAST_QUARTER_TOO_OLD_FOR_PROJECTION:
 89        return data  # Too old for projection
 90    annual_growth: float = data[data.columns[0]].astype(float).pct_change(QUARTERS_IN_YEAR).iloc[-1]
 91    if np.isnan(annual_growth):
 92        return data  # No valid growth rate
 93    new_periods = pd.period_range(start=last_period + 1, end=current_quarter, freq="Q")
 94    if new_periods.empty:
 95        return data  # No new periods to project
 96
 97    # --- Make the projection
 98    compound_q_growth_factor = (1 + annual_growth) ** (1 / QUARTERS_IN_YEAR)
 99    new_data = pd.Series(
100        data.iloc[-1, 0] * (compound_q_growth_factor ** np.arange(1, len(new_periods) + 1)), index=new_periods
101    )
102    return pd.DataFrame(data[data.columns[0]].combine_first(new_data))
103
104
105# --- public functions
106def fetch_pop(
107    source: Literal["erp", "na"] = "erp",
108    parameters: dict[str, str] | None = None,
109    *,
110    projection: bool = False,
111    validate: bool = False,
112    **kwargs: Unpack[GetFileKwargs],
113) -> tuple[pd.DataFrame, pd.DataFrame]:
114    """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API.
115
116    Args:
117        source (str): Source of the population data:
118            - "erp": ABS published Estimated Resident Population (default)
119            - "na": Implied population from the ABS National Accounts
120        parameters (dict[str, str] | None): Additional parameters for the API request,
121            such as 'startPeriod'.
122        projection (bool, optional): If True, and data is available for the most recent year,
123            make a projection forward to the current quarter, based on growth over the last 4 quarters.
124        validate (bool, optional): If True, validate the selection against the flow's
125            required dimensions when generating the URL key. Defaults to False.
126        **kwargs: Additional arguments passed to the fetch_selection() function
127
128    Returns:
129        tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata
130
131    """
132    # report the parameters used if requested
133    verbose = kwargs.get("verbose", False)
134    if verbose:
135        print(f"fetch_pop(): {source=} {validate=} {kwargs=}")
136
137    # build a selection criteria and fetch the relevant data
138    match source:
139        case "erp":
140            data, meta = _erp_population(parameters, validate=validate, **kwargs)
141        case "na":
142            data, meta = _na_population(parameters, validate=validate, **kwargs)
143        case _:
144            raise ValueError(f"Invalid source '{source}'. Must be one of: ['erp', 'na']")
145
146    # if requested, make a projection of the data
147    if projection:
148        data = _make_projection(data)
149
150    return data, meta
151
152
153if __name__ == "__main__":
154
155    def test_fetch_pop() -> None:
156        """Test function to fetch population data."""
157        parameters = {"startPeriod": "2024-Q4"}
158        pop_data, pop_meta = fetch_pop(source="na", parameters=parameters, projection=True, verbose=False)
159        print(pop_data, "\n", pop_meta.T)
160
161    test_fetch_pop()
QUARTERS_IN_YEAR = 4
LAST_QUARTER_TOO_OLD_FOR_PROJECTION = 4
def fetch_pop( source: Literal['erp', 'na'] = 'erp', parameters: dict[str, str] | None = None, *, projection: bool = False, validate: bool = False, **kwargs: Unpack[sdmxabs.GetFileKwargs]) -> tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame]:
107def fetch_pop(
108    source: Literal["erp", "na"] = "erp",
109    parameters: dict[str, str] | None = None,
110    *,
111    projection: bool = False,
112    validate: bool = False,
113    **kwargs: Unpack[GetFileKwargs],
114) -> tuple[pd.DataFrame, pd.DataFrame]:
115    """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API.
116
117    Args:
118        source (str): Source of the population data:
119            - "erp": ABS published Estimated Resident Population (default)
120            - "na": Implied population from the ABS National Accounts
121        parameters (dict[str, str] | None): Additional parameters for the API request,
122            such as 'startPeriod'.
123        projection (bool, optional): If True, and data is available for the most recent year,
124            make a projection forward to the current quarter, based on growth over the last 4 quarters.
125        validate (bool, optional): If True, validate the selection against the flow's
126            required dimensions when generating the URL key. Defaults to False.
127        **kwargs: Additional arguments passed to the fetch_selection() function
128
129    Returns:
130        tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata
131
132    """
133    # report the parameters used if requested
134    verbose = kwargs.get("verbose", False)
135    if verbose:
136        print(f"fetch_pop(): {source=} {validate=} {kwargs=}")
137
138    # build a selection criteria and fetch the relevant data
139    match source:
140        case "erp":
141            data, meta = _erp_population(parameters, validate=validate, **kwargs)
142        case "na":
143            data, meta = _na_population(parameters, validate=validate, **kwargs)
144        case _:
145            raise ValueError(f"Invalid source '{source}'. Must be one of: ['erp', 'na']")
146
147    # if requested, make a projection of the data
148    if projection:
149        data = _make_projection(data)
150
151    return data, meta

Fetch Estimated Resident Population (ERP) data from the ABS SDMX API.

Args: source (str): Source of the population data: - "erp": ABS published Estimated Resident Population (default) - "na": Implied population from the ABS National Accounts parameters (dict[str, str] | None): Additional parameters for the API request, such as 'startPeriod'. projection (bool, optional): If True, and data is available for the most recent year, make a projection forward to the current quarter, based on growth over the last 4 quarters. validate (bool, optional): If True, validate the selection against the flow's required dimensions when generating the URL key. Defaults to False. **kwargs: Additional arguments passed to the fetch_selection() function

Returns: tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata