sdmxabs.fetch_pop

Fetch Australian population data from the ABS SDMX API, either ERP or implied from National Accounts.

  1"""Fetch Australian population data from the ABS SDMX API, either ERP or implied from National Accounts."""
  2
  3from typing import Literal, Unpack
  4
  5import numpy as np
  6import pandas as pd
  7
  8from sdmxabs.download_cache import GetFileKwargs
  9from sdmxabs.fetch_gdp import fetch_gdp
 10from sdmxabs.fetch_selection import MatchType as Mt
 11from sdmxabs.fetch_selection import fetch_selection
 12from sdmxabs.flow_metadata import code_list_for_dim
 13
 14# --- constants
 15FLOW_ID = "ERP_COMP_Q"
 16QUARTERS_IN_YEAR = 4
 17LAST_QUARTER_TOO_OLD_FOR_PROJECTION = 4
 18
 19
 20# --- private functions
 21def _erp_population(
 22    state: str,
 23    parameters: dict[str, str] | None,
 24    *,
 25    validate: bool,
 26    **kwargs: Unpack[GetFileKwargs],
 27) -> tuple[pd.DataFrame, pd.DataFrame]:
 28    """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API."""
 29    selection_criteria = [
 30        ("Estimated Resident Population", "MEASURE", Mt.EXACT),
 31        ("Q", "FREQ", Mt.EXACT),
 32    ]
 33    if state:
 34        selection_criteria.append((state, "REGION", Mt.EXACT))
 35    d, m = fetch_selection(FLOW_ID, selection_criteria, parameters, validate=validate, **kwargs)
 36    return d, m
 37
 38
 39def _na_population(
 40    parameters: dict[str, str] | None,
 41    *,
 42    validate: bool,
 43    **kwargs: Unpack[GetFileKwargs],
 44) -> tuple[pd.DataFrame, pd.DataFrame]:
 45    """Extrapolate Australian population from the National Accounts data from the ABS SDMX API."""
 46    # --- Fetch GDP data
 47    gdp, _ = fetch_gdp(
 48        seasonality="o",
 49        price_measure="cp",
 50        parameters=parameters,
 51        validate=validate,
 52        **kwargs,
 53    )
 54
 55    # --- Fetch GDP per capita data
 56    selection_criteria = [
 57        ("Original", "TSEST", Mt.EXACT),
 58        ("Current prices", "MEASURE", Mt.EXACT),
 59        ("GDP per capita", "DATA_ITEM", Mt.EXACT),
 60    ]
 61    flow_id = "ANA_AGG"
 62    d, m = fetch_selection(flow_id, selection_criteria, parameters, validate=validate, **kwargs)
 63
 64    # --- Extrapolate population from the above two series, Fudge meta-data
 65    name = "Implicit Population from GDP"
 66    gdp_s = gdp[gdp.columns[0]].astype(float)
 67    gdppc_s = d[d.columns[0]].astype(float)
 68    pop_s = gdp_s.div(gdppc_s) * 1_000
 69    d = pd.DataFrame(pop_s)
 70    d.columns = m.index = pd.Index([name])
 71    for k, v in {"UNIT_MEASURE": "NUM", "UNIT_MULT": "3", "DATA_ITEM": name}.items():
 72        if k not in m.columns:
 73            continue
 74        m.loc[name, k] = v
 75    return d, m
 76
 77
 78def _make_projection(data: pd.DataFrame) -> pd.DataFrame:
 79    """Make a naive projection of the population data forward to the current quarter.
 80
 81    Return original data if (for example) the data is empty or too old for a reasonable
 82    projection. The projection is based on the annual growth over the latest quarters.
 83
 84    """
 85    # --- validation/preparation
 86    if data.empty:
 87        return data  # No data to project
 88    current_quarter = pd.Timestamp.now().to_period("Q")
 89    last_period = data.index[-1]
 90    if last_period >= current_quarter:
 91        return data  # No projection needed
 92    if last_period < current_quarter - LAST_QUARTER_TOO_OLD_FOR_PROJECTION:
 93        return data  # Too old for projection
 94    annual_growth: float = data[data.columns[0]].astype(float).pct_change(QUARTERS_IN_YEAR).iloc[-1]
 95    if np.isnan(annual_growth):
 96        return data  # No valid growth rate
 97    new_periods = pd.period_range(start=last_period + 1, end=current_quarter, freq="Q")
 98    if new_periods.empty:
 99        return data  # No new periods to project
100
101    # --- Make the projection
102    compound_q_growth_factor = (1 + annual_growth) ** (1 / QUARTERS_IN_YEAR)
103    new_data = pd.Series(
104        data.iloc[-1, 0] * (compound_q_growth_factor ** np.arange(1, len(new_periods) + 1)), index=new_periods
105    )
106    return pd.DataFrame(data[data.columns[0]].combine_first(new_data))
107
108
109def _state_name_from_abbrev(state: str) -> str:
110    """Convert a state abbreviation to its full name."""
111    # Abbreviation to full name mapping
112    abbrev_to_name = {
113        "nsw": "New South Wales",
114        "vic": "Victoria",
115        "qld": "Queensland",
116        "sa": "South Australia",
117        "wa": "Western Australia",
118        "tas": "Tasmania",
119        "nt": "Northern Territory",
120        "act": "Australian Capital Territory",
121    }
122    for abbrev in ("aust", "aus", "au"):
123        abbrev_to_name[abbrev] = "Australia"
124
125    lower_case_abbrev = state.lower().strip()
126    state_name = abbrev_to_name.get(lower_case_abbrev, state.strip())
127    state_names = pd.DataFrame(code_list_for_dim(FLOW_ID, "REGION")).T
128    if state_name not in state_names["name"].to_numpy():
129        raise ValueError(f"Invalid state '{state_name}'. Available: {list(state_names['name'].unique())}")
130    return state_name
131
132
133# --- public functions
134def fetch_pop(
135    source: Literal["erp", "na"] = "erp",
136    parameters: dict[str, str] | None = None,
137    *,
138    projection: bool = False,
139    validate: bool = False,
140    **kwargs: Unpack[GetFileKwargs],
141) -> tuple[pd.DataFrame, pd.DataFrame]:
142    """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API.
143
144    Args:
145        source (str): Source of the population data:
146            - "erp": ABS published Estimated Resident Population (default)
147            - "na": Implied population from the ABS National Accounts
148        parameters (dict[str, str] | None): Additional parameters for the API request,
149            such as 'startPeriod'.
150        projection (bool, optional): If True, and data is available for the most recent year,
151            make a projection forward to the current quarter, based on growth over the last 4 quarters.
152        validate (bool, optional): If True, validate the selection against the flow's
153            required dimensions when generating the URL key. Defaults to False.
154        **kwargs: Additional arguments passed to the fetch_selection() function
155
156    Returns:
157        tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata
158
159    """
160    # report the parameters used if requested
161    verbose = kwargs.get("verbose", False)
162    if verbose:
163        print(f"fetch_pop(): {source=} {validate=} {kwargs=}")
164
165    # build a selection criteria and fetch the relevant data
166    match source:
167        case "erp":
168            data, meta = _erp_population("Australia", parameters, validate=validate, **kwargs)
169        case "na":
170            data, meta = _na_population(parameters, validate=validate, **kwargs)
171        case _:
172            raise ValueError(f"Invalid source '{source}'. Must be one of: ['erp', 'na']")
173
174    # if requested, make a projection of the data
175    if projection:
176        data = _make_projection(data)
177
178    return data, meta
179
180
181def fetch_state_pop(
182    state: str,
183    parameters: dict[str, str] | None = None,
184    *,
185    projection: bool = False,
186    validate: bool = False,
187    **kwargs: Unpack[GetFileKwargs],
188) -> tuple[pd.DataFrame, pd.DataFrame]:
189    """Fetch state-level ERP population data from the ABS SDMX API.
190
191    Args:
192        state (str): State/territory name or case-insensitive abbreviation (e.g., "NSW", "Vic", "qld", etc.).
193            [Note: Use "" or "all" for the population estimates for all states.]
194        parameters (dict[str, str] | None): Additional parameters for the API request,
195            such as 'startPeriod'.
196        projection (bool, optional): If True, make a projection forward to the current quarter
197            based on growth over the last 4 quarters.
198        validate (bool, optional): If True, validate the selection against the flow's
199            required dimensions when generating the URL key. Defaults to False.
200        **kwargs: Additional arguments passed to the fetch_selection() function
201
202    Returns:
203        tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata
204
205    """
206    # report the parameters used if requested
207    verbose = kwargs.get("verbose", False)
208    if verbose:
209        print(f"fetch_state_pop(): {state=} {validate=} {kwargs=}")
210
211    if state.lower() in ("", "all"):
212        full_state_name: str = ""
213    else:
214        full_state_name = _state_name_from_abbrev(state)
215
216    data, meta = _erp_population(full_state_name, parameters, validate=validate, **kwargs)
217
218    if projection:
219        data = _make_projection(data)
220
221    return data, meta
222
223
224if __name__ == "__main__":
225
226    def test_fetch_pop() -> None:
227        """Test function to fetch population data."""
228        parameters = {"startPeriod": "2023-Q4"}
229        sources: list[Literal["erp", "na"]] = ["erp", "na"]
230        for source in sources:
231            for proj in [False, True]:
232                pop_data, _pop_meta = fetch_pop(source, parameters=parameters, projection=proj, verbose=False)
233                print(f"{source} --> fetch_pop(): {pop_data.index[-1]} = {pop_data.tail(1).iloc[0, 0]:,.0f}")
234
235    def test_fetch_state_pop() -> None:
236        """Test function to fetch state population data."""
237        # Test abbreviations
238        for state in ["AUS", "VIC", "QLD"]:
239            print(f"{state} --> {_state_name_from_abbrev(state)}")
240
241        # Test fetch_state_pop
242        data, _meta = fetch_state_pop("SA", projection=False, validate=False)
243        print(f"SA: {data.index[-1]} = {data.tail(1).iloc[0, 0]:,.0f}")
244
245        # Test projection
246        data, _meta = fetch_state_pop("SA", projection=True)
247        print(f"SA with projection: {data.index[-1]} = {data.tail(1).iloc[0, 0]:,.0f}")
248
249        # Test getting all state populations
250        data, meta = fetch_state_pop("all", projection=False, validate=False)
251        rename = dict(zip(meta.index, meta["REGION"], strict=False))
252        data = data.rename(columns=rename)
253        print(f"All states:\n{data.tail(1).T}")
254
255    print("\n" + "=" * 50)
256    test_fetch_pop()
257    print("\n" + "=" * 50)
258    test_fetch_state_pop()
259    print("\n" + "=" * 50)
FLOW_ID = 'ERP_COMP_Q'
QUARTERS_IN_YEAR = 4
LAST_QUARTER_TOO_OLD_FOR_PROJECTION = 4
def fetch_pop( source: Literal['erp', 'na'] = 'erp', parameters: dict[str, str] | None = None, *, projection: bool = False, validate: bool = False, **kwargs: Unpack[sdmxabs.GetFileKwargs]) -> tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame]:
135def fetch_pop(
136    source: Literal["erp", "na"] = "erp",
137    parameters: dict[str, str] | None = None,
138    *,
139    projection: bool = False,
140    validate: bool = False,
141    **kwargs: Unpack[GetFileKwargs],
142) -> tuple[pd.DataFrame, pd.DataFrame]:
143    """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API.
144
145    Args:
146        source (str): Source of the population data:
147            - "erp": ABS published Estimated Resident Population (default)
148            - "na": Implied population from the ABS National Accounts
149        parameters (dict[str, str] | None): Additional parameters for the API request,
150            such as 'startPeriod'.
151        projection (bool, optional): If True, and data is available for the most recent year,
152            make a projection forward to the current quarter, based on growth over the last 4 quarters.
153        validate (bool, optional): If True, validate the selection against the flow's
154            required dimensions when generating the URL key. Defaults to False.
155        **kwargs: Additional arguments passed to the fetch_selection() function
156
157    Returns:
158        tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata
159
160    """
161    # report the parameters used if requested
162    verbose = kwargs.get("verbose", False)
163    if verbose:
164        print(f"fetch_pop(): {source=} {validate=} {kwargs=}")
165
166    # build a selection criteria and fetch the relevant data
167    match source:
168        case "erp":
169            data, meta = _erp_population("Australia", parameters, validate=validate, **kwargs)
170        case "na":
171            data, meta = _na_population(parameters, validate=validate, **kwargs)
172        case _:
173            raise ValueError(f"Invalid source '{source}'. Must be one of: ['erp', 'na']")
174
175    # if requested, make a projection of the data
176    if projection:
177        data = _make_projection(data)
178
179    return data, meta

Fetch Estimated Resident Population (ERP) data from the ABS SDMX API.

Args: source (str): Source of the population data: - "erp": ABS published Estimated Resident Population (default) - "na": Implied population from the ABS National Accounts parameters (dict[str, str] | None): Additional parameters for the API request, such as 'startPeriod'. projection (bool, optional): If True, and data is available for the most recent year, make a projection forward to the current quarter, based on growth over the last 4 quarters. validate (bool, optional): If True, validate the selection against the flow's required dimensions when generating the URL key. Defaults to False. **kwargs: Additional arguments passed to the fetch_selection() function

Returns: tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata

def fetch_state_pop( state: str, parameters: dict[str, str] | None = None, *, projection: bool = False, validate: bool = False, **kwargs: Unpack[sdmxabs.GetFileKwargs]) -> tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame]:
182def fetch_state_pop(
183    state: str,
184    parameters: dict[str, str] | None = None,
185    *,
186    projection: bool = False,
187    validate: bool = False,
188    **kwargs: Unpack[GetFileKwargs],
189) -> tuple[pd.DataFrame, pd.DataFrame]:
190    """Fetch state-level ERP population data from the ABS SDMX API.
191
192    Args:
193        state (str): State/territory name or case-insensitive abbreviation (e.g., "NSW", "Vic", "qld", etc.).
194            [Note: Use "" or "all" for the population estimates for all states.]
195        parameters (dict[str, str] | None): Additional parameters for the API request,
196            such as 'startPeriod'.
197        projection (bool, optional): If True, make a projection forward to the current quarter
198            based on growth over the last 4 quarters.
199        validate (bool, optional): If True, validate the selection against the flow's
200            required dimensions when generating the URL key. Defaults to False.
201        **kwargs: Additional arguments passed to the fetch_selection() function
202
203    Returns:
204        tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata
205
206    """
207    # report the parameters used if requested
208    verbose = kwargs.get("verbose", False)
209    if verbose:
210        print(f"fetch_state_pop(): {state=} {validate=} {kwargs=}")
211
212    if state.lower() in ("", "all"):
213        full_state_name: str = ""
214    else:
215        full_state_name = _state_name_from_abbrev(state)
216
217    data, meta = _erp_population(full_state_name, parameters, validate=validate, **kwargs)
218
219    if projection:
220        data = _make_projection(data)
221
222    return data, meta

Fetch state-level ERP population data from the ABS SDMX API.

Args: state (str): State/territory name or case-insensitive abbreviation (e.g., "NSW", "Vic", "qld", etc.). [Note: Use "" or "all" for the population estimates for all states.] parameters (dict[str, str] | None): Additional parameters for the API request, such as 'startPeriod'. projection (bool, optional): If True, make a projection forward to the current quarter based on growth over the last 4 quarters. validate (bool, optional): If True, validate the selection against the flow's required dimensions when generating the URL key. Defaults to False. **kwargs: Additional arguments passed to the fetch_selection() function

Returns: tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata