sdmxabs.fetch_pop

Fetch Australian population data from the ABS SDMX API, either ERP or implied from National Accounts.

  1"""Fetch Australian population data from the ABS SDMX API, either ERP or implied from National Accounts."""
  2
  3from typing import Literal, Unpack
  4
  5import numpy as np
  6import pandas as pd
  7
  8from sdmxabs.download_cache import GetFileKwargs
  9from sdmxabs.fetch_gdp import fetch_gdp
 10from sdmxabs.fetch_selection import MatchType as Mt
 11from sdmxabs.fetch_selection import fetch_selection
 12from sdmxabs.flow_metadata import code_list_for, structure_ident
 13
 14# --- constants
 15FLOW_ID = "ERP_COMP_Q"
 16STRUCTURE_ID = structure_ident(FLOW_ID)
 17QUARTERS_IN_YEAR = 4
 18LAST_QUARTER_TOO_OLD_FOR_PROJECTION = 4
 19
 20
 21# --- private functions
 22def _erp_population(
 23    state: str,
 24    parameters: dict[str, str] | None,
 25    *,
 26    validate: bool,
 27    **kwargs: Unpack[GetFileKwargs],
 28) -> tuple[pd.DataFrame, pd.DataFrame]:
 29    """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API."""
 30    selection_criteria = [
 31        ("Estimated Resident Population", "MEASURE", Mt.EXACT),
 32        ("Q", "FREQ", Mt.EXACT),
 33    ]
 34    if state:
 35        selection_criteria.append((state, "REGION", Mt.EXACT))
 36    d, m = fetch_selection(FLOW_ID, selection_criteria, parameters, validate=validate, **kwargs)
 37    return d, m
 38
 39
 40def _na_population(
 41    parameters: dict[str, str] | None,
 42    *,
 43    validate: bool,
 44    **kwargs: Unpack[GetFileKwargs],
 45) -> tuple[pd.DataFrame, pd.DataFrame]:
 46    """Extrapolate Australian population from the National Accounts data from the ABS SDMX API."""
 47    # --- Fetch GDP data
 48    gdp, _ = fetch_gdp(
 49        seasonality="o",
 50        price_measure="cp",
 51        parameters=parameters,
 52        validate=validate,
 53        **kwargs,
 54    )
 55
 56    # --- Fetch GDP per capita data
 57    selection_criteria = [
 58        ("Original", "TSEST", Mt.EXACT),
 59        ("Current prices", "MEASURE", Mt.EXACT),
 60        ("GDP per capita", "DATA_ITEM", Mt.EXACT),
 61    ]
 62    flow_id = "ANA_AGG"
 63    d, m = fetch_selection(flow_id, selection_criteria, parameters, validate=validate, **kwargs)
 64
 65    # --- Extrapolate population from the above two series, Fudge meta-data
 66    name = "Implicit Population from GDP"
 67    gdp_s = gdp[gdp.columns[0]].astype(float)
 68    gdppc_s = d[d.columns[0]].astype(float)
 69    pop_s = gdp_s.div(gdppc_s) * 1_000
 70    d = pd.DataFrame(pop_s)
 71    d.columns = m.index = pd.Index([name])
 72    for k, v in {"UNIT_MEASURE": "NUM", "UNIT_MULT": "3", "DATA_ITEM": name}.items():
 73        if k not in m.columns:
 74            continue
 75        m.loc[name, k] = v
 76    return d, m
 77
 78
 79def _make_projection(data: pd.DataFrame) -> pd.DataFrame:
 80    """Make a naive projection of the population data forward to the current quarter.
 81
 82    Return original data if (for example) the data is empty or too old for a reasonable
 83    projection. The projection is based on the annual growth over the latest quarters.
 84
 85    """
 86    # --- validation/preparation
 87    if data.empty:
 88        return data  # No data to project
 89    current_quarter = pd.Timestamp.now().to_period("Q")
 90    last_period = data.index[-1]
 91    if last_period >= current_quarter:
 92        return data  # No projection needed
 93    if last_period < current_quarter - LAST_QUARTER_TOO_OLD_FOR_PROJECTION:
 94        return data  # Too old for projection
 95    annual_growth: float = data[data.columns[0]].astype(float).pct_change(QUARTERS_IN_YEAR).iloc[-1]
 96    if np.isnan(annual_growth):
 97        return data  # No valid growth rate
 98    new_periods = pd.period_range(start=last_period + 1, end=current_quarter, freq="Q")
 99    if new_periods.empty:
100        return data  # No new periods to project
101
102    # --- Make the projection
103    compound_q_growth_factor = (1 + annual_growth) ** (1 / QUARTERS_IN_YEAR)
104    new_data = pd.Series(
105        data.iloc[-1, 0] * (compound_q_growth_factor ** np.arange(1, len(new_periods) + 1)), index=new_periods
106    )
107    return pd.DataFrame(data[data.columns[0]].combine_first(new_data))
108
109
110def _state_name_from_abbrev(state: str) -> str:
111    """Convert a state abbreviation to its full name."""
112    # Abbreviation to full name mapping
113    abbrev_to_name = {
114        "nsw": "New South Wales",
115        "vic": "Victoria",
116        "qld": "Queensland",
117        "sa": "South Australia",
118        "wa": "Western Australia",
119        "tas": "Tasmania",
120        "nt": "Northern Territory",
121        "act": "Australian Capital Territory",
122    }
123    for abbrev in ("aust", "aus", "au"):
124        abbrev_to_name[abbrev] = "Australia"
125
126    lower_case_abbrev = state.lower().strip()
127    state_name = abbrev_to_name.get(lower_case_abbrev, state.strip())
128    state_names = pd.DataFrame(code_list_for(STRUCTURE_ID, "REGION")).T
129    if state_name not in state_names["name"].to_numpy():
130        raise ValueError(f"Invalid state '{state_name}'. Available: {list(state_names['name'].unique())}")
131    return state_name
132
133
134# --- public functions
135def fetch_pop(
136    source: Literal["erp", "na"] = "erp",
137    parameters: dict[str, str] | None = None,
138    *,
139    projection: bool = False,
140    validate: bool = False,
141    **kwargs: Unpack[GetFileKwargs],
142) -> tuple[pd.DataFrame, pd.DataFrame]:
143    """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API.
144
145    Args:
146        source (str): Source of the population data:
147            - "erp": ABS published Estimated Resident Population (default)
148            - "na": Implied population from the ABS National Accounts
149        parameters (dict[str, str] | None): Additional parameters for the API request,
150            such as 'startPeriod'.
151        projection (bool, optional): If True, and data is available for the most recent year,
152            make a projection forward to the current quarter, based on growth over the last 4 quarters.
153        validate (bool, optional): If True, validate the selection against the flow's
154            required dimensions when generating the URL key. Defaults to False.
155        **kwargs: Additional arguments passed to the fetch_selection() function
156
157    Returns:
158        tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata
159
160    """
161    # report the parameters used if requested
162    verbose = kwargs.get("verbose", False)
163    if verbose:
164        print(f"fetch_pop(): {source=} {validate=} {kwargs=}")
165
166    # build a selection criteria and fetch the relevant data
167    match source:
168        case "erp":
169            data, meta = _erp_population("Australia", parameters, validate=validate, **kwargs)
170        case "na":
171            data, meta = _na_population(parameters, validate=validate, **kwargs)
172        case _:
173            raise ValueError(f"Invalid source '{source}'. Must be one of: ['erp', 'na']")
174
175    # if requested, make a projection of the data
176    if projection:
177        data = _make_projection(data)
178
179    return data, meta
180
181
182def fetch_state_pop(
183    state: str,
184    parameters: dict[str, str] | None = None,
185    *,
186    projection: bool = False,
187    validate: bool = False,
188    **kwargs: Unpack[GetFileKwargs],
189) -> tuple[pd.DataFrame, pd.DataFrame]:
190    """Fetch state-level ERP population data from the ABS SDMX API.
191
192    Args:
193        state (str): State/territory name or case-insensitive abbreviation (e.g., "NSW", "Vic", "qld", etc.).
194            [Note: Use "" or "all" for the population estimates for all states.]
195        parameters (dict[str, str] | None): Additional parameters for the API request,
196            such as 'startPeriod'.
197        projection (bool, optional): If True, make a projection forward to the current quarter
198            based on growth over the last 4 quarters.
199        validate (bool, optional): If True, validate the selection against the flow's
200            required dimensions when generating the URL key. Defaults to False.
201        **kwargs: Additional arguments passed to the fetch_selection() function
202
203    Returns:
204        tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata
205
206    """
207    # report the parameters used if requested
208    verbose = kwargs.get("verbose", False)
209    if verbose:
210        print(f"fetch_state_pop(): {state=} {validate=} {kwargs=}")
211
212    if state.lower() in ("", "all"):
213        full_state_name: str = ""
214    else:
215        full_state_name = _state_name_from_abbrev(state)
216
217    data, meta = _erp_population(full_state_name, parameters, validate=validate, **kwargs)
218
219    if projection:
220        data = _make_projection(data)
221
222    return data, meta
223
224
225if __name__ == "__main__":
226
227    def test_fetch_pop() -> None:
228        """Test function to fetch population data."""
229        parameters = {"startPeriod": "2023-Q4"}
230        sources: list[Literal["erp", "na"]] = ["erp", "na"]
231        for source in sources:
232            for proj in [False, True]:
233                pop_data, _pop_meta = fetch_pop(source, parameters=parameters, projection=proj, verbose=False)
234                print(f"{source} --> fetch_pop(): {pop_data.index[-1]} = {pop_data.tail(1).iloc[0, 0]:,.0f}")
235
236    def test_fetch_state_pop() -> None:
237        """Test function to fetch state population data."""
238        # Test abbreviations
239        for state in ["AUS", "VIC", "QLD"]:
240            print(f"{state} --> {_state_name_from_abbrev(state)}")
241
242        # Test fetch_state_pop
243        data, _meta = fetch_state_pop("SA", projection=False, validate=False)
244        print(f"SA: {data.index[-1]} = {data.tail(1).iloc[0, 0]:,.0f}")
245
246        # Test projection
247        data, _meta = fetch_state_pop("SA", projection=True)
248        print(f"SA with projection: {data.index[-1]} = {data.tail(1).iloc[0, 0]:,.0f}")
249
250        # Test getting all state populations
251        data, meta = fetch_state_pop("all", projection=False, validate=False)
252        rename = dict(zip(meta.index, meta["REGION"], strict=False))
253        data = data.rename(columns=rename)
254        print(f"All states:\n{data.tail(1).T}")
255
256    print("\n" + "=" * 50)
257    test_fetch_pop()
258    print("\n" + "=" * 50)
259    test_fetch_state_pop()
260    print("\n" + "=" * 50)
FLOW_ID = 'ERP_COMP_Q'
STRUCTURE_ID = 'ERP_COMP_Q'
QUARTERS_IN_YEAR = 4
LAST_QUARTER_TOO_OLD_FOR_PROJECTION = 4
def fetch_pop( source: Literal['erp', 'na'] = 'erp', parameters: dict[str, str] | None = None, *, projection: bool = False, validate: bool = False, **kwargs: Unpack[sdmxabs.GetFileKwargs]) -> tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame]:
136def fetch_pop(
137    source: Literal["erp", "na"] = "erp",
138    parameters: dict[str, str] | None = None,
139    *,
140    projection: bool = False,
141    validate: bool = False,
142    **kwargs: Unpack[GetFileKwargs],
143) -> tuple[pd.DataFrame, pd.DataFrame]:
144    """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API.
145
146    Args:
147        source (str): Source of the population data:
148            - "erp": ABS published Estimated Resident Population (default)
149            - "na": Implied population from the ABS National Accounts
150        parameters (dict[str, str] | None): Additional parameters for the API request,
151            such as 'startPeriod'.
152        projection (bool, optional): If True, and data is available for the most recent year,
153            make a projection forward to the current quarter, based on growth over the last 4 quarters.
154        validate (bool, optional): If True, validate the selection against the flow's
155            required dimensions when generating the URL key. Defaults to False.
156        **kwargs: Additional arguments passed to the fetch_selection() function
157
158    Returns:
159        tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata
160
161    """
162    # report the parameters used if requested
163    verbose = kwargs.get("verbose", False)
164    if verbose:
165        print(f"fetch_pop(): {source=} {validate=} {kwargs=}")
166
167    # build a selection criteria and fetch the relevant data
168    match source:
169        case "erp":
170            data, meta = _erp_population("Australia", parameters, validate=validate, **kwargs)
171        case "na":
172            data, meta = _na_population(parameters, validate=validate, **kwargs)
173        case _:
174            raise ValueError(f"Invalid source '{source}'. Must be one of: ['erp', 'na']")
175
176    # if requested, make a projection of the data
177    if projection:
178        data = _make_projection(data)
179
180    return data, meta

Fetch Estimated Resident Population (ERP) data from the ABS SDMX API.

Args: source (str): Source of the population data: - "erp": ABS published Estimated Resident Population (default) - "na": Implied population from the ABS National Accounts parameters (dict[str, str] | None): Additional parameters for the API request, such as 'startPeriod'. projection (bool, optional): If True, and data is available for the most recent year, make a projection forward to the current quarter, based on growth over the last 4 quarters. validate (bool, optional): If True, validate the selection against the flow's required dimensions when generating the URL key. Defaults to False. **kwargs: Additional arguments passed to the fetch_selection() function

Returns: tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata

def fetch_state_pop( state: str, parameters: dict[str, str] | None = None, *, projection: bool = False, validate: bool = False, **kwargs: Unpack[sdmxabs.GetFileKwargs]) -> tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame]:
183def fetch_state_pop(
184    state: str,
185    parameters: dict[str, str] | None = None,
186    *,
187    projection: bool = False,
188    validate: bool = False,
189    **kwargs: Unpack[GetFileKwargs],
190) -> tuple[pd.DataFrame, pd.DataFrame]:
191    """Fetch state-level ERP population data from the ABS SDMX API.
192
193    Args:
194        state (str): State/territory name or case-insensitive abbreviation (e.g., "NSW", "Vic", "qld", etc.).
195            [Note: Use "" or "all" for the population estimates for all states.]
196        parameters (dict[str, str] | None): Additional parameters for the API request,
197            such as 'startPeriod'.
198        projection (bool, optional): If True, make a projection forward to the current quarter
199            based on growth over the last 4 quarters.
200        validate (bool, optional): If True, validate the selection against the flow's
201            required dimensions when generating the URL key. Defaults to False.
202        **kwargs: Additional arguments passed to the fetch_selection() function
203
204    Returns:
205        tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata
206
207    """
208    # report the parameters used if requested
209    verbose = kwargs.get("verbose", False)
210    if verbose:
211        print(f"fetch_state_pop(): {state=} {validate=} {kwargs=}")
212
213    if state.lower() in ("", "all"):
214        full_state_name: str = ""
215    else:
216        full_state_name = _state_name_from_abbrev(state)
217
218    data, meta = _erp_population(full_state_name, parameters, validate=validate, **kwargs)
219
220    if projection:
221        data = _make_projection(data)
222
223    return data, meta

Fetch state-level ERP population data from the ABS SDMX API.

Args: state (str): State/territory name or case-insensitive abbreviation (e.g., "NSW", "Vic", "qld", etc.). [Note: Use "" or "all" for the population estimates for all states.] parameters (dict[str, str] | None): Additional parameters for the API request, such as 'startPeriod'. projection (bool, optional): If True, make a projection forward to the current quarter based on growth over the last 4 quarters. validate (bool, optional): If True, validate the selection against the flow's required dimensions when generating the URL key. Defaults to False. **kwargs: Additional arguments passed to the fetch_selection() function

Returns: tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata