sdmxabs.fetch_pop

Fetch Australian population data from the ABS SDMX API, either ERP or implied from National Accounts.

  1"""Fetch Australian population data from the ABS SDMX API, either ERP or implied from National Accounts."""
  2
  3from typing import Literal, Unpack
  4
  5import numpy as np
  6import pandas as pd
  7
  8from sdmxabs.download_cache import GetFileKwargs
  9from sdmxabs.fetch_gdp import fetch_gdp
 10from sdmxabs.fetch_selection import MatchType as Mt
 11from sdmxabs.fetch_selection import fetch_selection
 12from sdmxabs.flow_metadata import code_list_for_dim
 13
 14
 15# --- constants
 16FLOW_ID = "ERP_COMP_Q"
 17QUARTERS_IN_YEAR = 4
 18LAST_QUARTER_TOO_OLD_FOR_PROJECTION = 4
 19
 20
 21# --- private functions
 22def _erp_population(
 23    state: str,
 24    parameters: dict[str, str] | None,
 25    *,
 26    validate: bool,
 27    **kwargs: Unpack[GetFileKwargs],
 28) -> tuple[pd.DataFrame, pd.DataFrame]:
 29    """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API."""
 30    selection_criteria = [
 31        ("Estimated Resident Population", "MEASURE", Mt.EXACT),
 32        (state, "REGION", Mt.EXACT),
 33        ("Q", "FREQ", Mt.EXACT),
 34    ]
 35    d, m = fetch_selection(FLOW_ID, selection_criteria, parameters, validate=validate, **kwargs)
 36    if len(m) != 1:
 37        raise ValueError(f"Expected 1 match for {state}, found {len(m)}")
 38    d.columns = m.index = pd.Index(["Estimated Resident Population"])
 39    return d, m
 40
 41
 42def _na_population(
 43    parameters: dict[str, str] | None,
 44    *,
 45    validate: bool,
 46    **kwargs: Unpack[GetFileKwargs],
 47) -> tuple[pd.DataFrame, pd.DataFrame]:
 48    """Extrapolate population from the National Accounts data from the ABS SDMX API."""
 49    # --- Fetch GDP data
 50    gdp, _ = fetch_gdp(
 51        seasonality="o",
 52        price_measure="cp",
 53        parameters=parameters,
 54        validate=validate,
 55        **kwargs,
 56    )
 57
 58    # --- Fetch GDP per capita data
 59    selection_criteria = [
 60        ("Original", "TSEST", Mt.EXACT),
 61        ("Current prices", "MEASURE", Mt.EXACT),
 62        ("GDP per capita", "DATA_ITEM", Mt.EXACT),
 63    ]
 64    flow_id = "ANA_AGG"
 65    d, m = fetch_selection(flow_id, selection_criteria, parameters, validate=validate, **kwargs)
 66
 67    # --- Extrapolate population from the above two series, Fudge meta-data
 68    name = "Implicit Population from GDP"
 69    gdp_s = gdp[gdp.columns[0]].astype(float)
 70    gdppc_s = d[d.columns[0]].astype(float)
 71    pop_s = gdp_s.div(gdppc_s) * 1_000
 72    d = pd.DataFrame(pop_s)
 73    d.columns = m.index = pd.Index([name])
 74    for k, v in {"UNIT_MEASURE": "NUM", "UNIT_MULT": "3", "DATA_ITEM": name}.items():
 75        if k not in m.columns:
 76            continue
 77        m.loc[name, k] = v
 78    return d, m
 79
 80
 81def _make_projection(data: pd.DataFrame) -> pd.DataFrame:
 82    """Make a naive projection of the population data forward to the current quarter.
 83
 84    Return original data if (for example) the data is empty or too old for a reasonable
 85    projection. The projection is based on the annual growth over the latest quarters.
 86
 87    """
 88    # --- validation/preparation
 89    if data.empty:
 90        return data  # No data to project
 91    current_quarter = pd.Timestamp.now().to_period("Q")
 92    last_period = data.index[-1]
 93    if last_period >= current_quarter:
 94        return data  # No projection needed
 95    if last_period < current_quarter - LAST_QUARTER_TOO_OLD_FOR_PROJECTION:
 96        return data  # Too old for projection
 97    annual_growth: float = data[data.columns[0]].astype(float).pct_change(QUARTERS_IN_YEAR).iloc[-1]
 98    if np.isnan(annual_growth):
 99        return data  # No valid growth rate
100    new_periods = pd.period_range(start=last_period + 1, end=current_quarter, freq="Q")
101    if new_periods.empty:
102        return data  # No new periods to project
103
104    # --- Make the projection
105    compound_q_growth_factor = (1 + annual_growth) ** (1 / QUARTERS_IN_YEAR)
106    new_data = pd.Series(
107        data.iloc[-1, 0] * (compound_q_growth_factor ** np.arange(1, len(new_periods) + 1)), index=new_periods
108    )
109    return pd.DataFrame(data[data.columns[0]].combine_first(new_data))
110
111
112def _state_name_from_abbrev(state: str) -> str:
113    """Convert a state abbreviation to its full name."""
114
115    # Abbreviation to full name mapping
116    abbrev_to_name = {
117        "nsw": "New South Wales",
118        "vic": "Victoria", 
119        "qld": "Queensland",
120        "sa": "South Australia",
121        "wa": "Western Australia",
122        "tas": "Tasmania",
123        "nt": "Northern Territory", 
124        "act": "Australian Capital Territory",
125    }
126    for abbrev in ("aust", "aus", "au"):
127        abbrev_to_name[abbrev] = "Australia"
128
129    lower_case_abbrev = state.lower().strip()
130    state_name = abbrev_to_name.get(lower_case_abbrev, state.strip())
131    state_names = pd.DataFrame(code_list_for_dim(FLOW_ID, "REGION")).T
132    if state_name not in state_names["name"].values:
133        raise ValueError(f"Invalid state '{state_name}'. Available: {list(state_names['name'].unique())}")
134    return state_name
135
136
137# --- public functions
138def fetch_pop(
139    source: Literal["erp", "na"] = "erp",
140    parameters: dict[str, str] | None = None,
141    *,
142    projection: bool = False,
143    validate: bool = False,
144    **kwargs: Unpack[GetFileKwargs],
145) -> tuple[pd.DataFrame, pd.DataFrame]:
146    """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API.
147
148    Args:
149        source (str): Source of the population data:
150            - "erp": ABS published Estimated Resident Population (default)
151            - "na": Implied population from the ABS National Accounts
152        parameters (dict[str, str] | None): Additional parameters for the API request,
153            such as 'startPeriod'.
154        projection (bool, optional): If True, and data is available for the most recent year,
155            make a projection forward to the current quarter, based on growth over the last 4 quarters.
156        validate (bool, optional): If True, validate the selection against the flow's
157            required dimensions when generating the URL key. Defaults to False.
158        **kwargs: Additional arguments passed to the fetch_selection() function
159
160    Returns:
161        tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata
162
163    """
164    # report the parameters used if requested
165    verbose = kwargs.get("verbose", False)
166    if verbose:
167        print(f"fetch_pop(): {source=} {validate=} {kwargs=}")
168
169    # build a selection criteria and fetch the relevant data
170    match source:
171        case "erp":
172            data, meta = _erp_population("Australia", parameters, validate=validate, **kwargs)
173        case "na":
174            data, meta = _na_population(parameters, validate=validate, **kwargs)
175        case _:
176            raise ValueError(f"Invalid source '{source}'. Must be one of: ['erp', 'na']")
177
178    # if requested, make a projection of the data
179    if projection:
180        data = _make_projection(data)
181
182    return data, meta
183
184
185def fetch_state_pop(
186    state: str,
187    parameters: dict[str, str] | None = None,
188    *,
189    projection: bool = False,
190    validate: bool = False,
191    **kwargs: Unpack[GetFileKwargs],
192) -> tuple[pd.DataFrame, pd.DataFrame]:
193    """Fetch state-level ERP population data from the ABS SDMX API.
194
195    Args:
196        state (str): State/territory name or case-insensitive abbreviation (e.g., "NSW", "Vic", "qld", etc.).
197        parameters (dict[str, str] | None): Additional parameters for the API request,
198            such as 'startPeriod'.
199        projection (bool, optional): If True, make a projection forward to the current quarter
200            based on growth over the last 4 quarters.
201        validate (bool, optional): If True, validate the selection against the flow's
202            required dimensions when generating the URL key. Defaults to False.
203        **kwargs: Additional arguments passed to the fetch_selection() function
204
205    Returns:
206        tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata
207
208    """
209    # report the parameters used if requested
210    verbose = kwargs.get("verbose", False)
211    if verbose:
212        print(f"fetch_state_pop(): {state=} {validate=} {kwargs=}")
213
214
215    full_state_name = _state_name_from_abbrev(state)
216
217    data, meta = _erp_population(
218        full_state_name,
219        parameters,
220        validate=validate,
221        **kwargs
222    )
223
224    if projection:
225        data = _make_projection(data)
226
227    return data, meta
228
229
230if __name__ == "__main__":
231
232    def test_fetch_pop() -> None:
233        """Test function to fetch population data."""
234        parameters = {"startPeriod": "2023-Q4"}
235        for source in ["erp", "na"]:
236            for proj in [False, True]:
237                pop_data, _pop_meta = fetch_pop(source, parameters=parameters, projection=proj, verbose=False)
238                print(f"{source} --> fetch_pop(): {pop_data.index[-1]} = {pop_data.tail(1).iloc[0, 0]:,.0f}")
239
240    def test_fetch_state_pop() -> None:
241        """Test function to fetch state population data."""
242
243        # Test abbreviations
244        for state in ["AUS", "VIC", "QLD"]:
245            print(f"{state} --> {_state_name_from_abbrev(state)}")
246
247        # Test fetch_state_pop
248        data, _meta = fetch_state_pop("SA", projection=False, validate=False)
249        print(f"SA: {data.index[-1]} = {data.tail(1).iloc[0, 0]:,.0f}")
250
251        # Test projection
252        data, _meta = fetch_state_pop("SA", projection=True)
253        print(f"SA with projection: {data.index[-1]} = {data.tail(1).iloc[0, 0]:,.0f}")
254
255    print("\n" + "="*50)
256    test_fetch_pop()
257    print("\n" + "="*50)
258    test_fetch_state_pop()
259    print("\n" + "="*50)
FLOW_ID = 'ERP_COMP_Q'
QUARTERS_IN_YEAR = 4
LAST_QUARTER_TOO_OLD_FOR_PROJECTION = 4
def fetch_pop( source: Literal['erp', 'na'] = 'erp', parameters: dict[str, str] | None = None, *, projection: bool = False, validate: bool = False, **kwargs: Unpack[sdmxabs.GetFileKwargs]) -> tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame]:
139def fetch_pop(
140    source: Literal["erp", "na"] = "erp",
141    parameters: dict[str, str] | None = None,
142    *,
143    projection: bool = False,
144    validate: bool = False,
145    **kwargs: Unpack[GetFileKwargs],
146) -> tuple[pd.DataFrame, pd.DataFrame]:
147    """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API.
148
149    Args:
150        source (str): Source of the population data:
151            - "erp": ABS published Estimated Resident Population (default)
152            - "na": Implied population from the ABS National Accounts
153        parameters (dict[str, str] | None): Additional parameters for the API request,
154            such as 'startPeriod'.
155        projection (bool, optional): If True, and data is available for the most recent year,
156            make a projection forward to the current quarter, based on growth over the last 4 quarters.
157        validate (bool, optional): If True, validate the selection against the flow's
158            required dimensions when generating the URL key. Defaults to False.
159        **kwargs: Additional arguments passed to the fetch_selection() function
160
161    Returns:
162        tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata
163
164    """
165    # report the parameters used if requested
166    verbose = kwargs.get("verbose", False)
167    if verbose:
168        print(f"fetch_pop(): {source=} {validate=} {kwargs=}")
169
170    # build a selection criteria and fetch the relevant data
171    match source:
172        case "erp":
173            data, meta = _erp_population("Australia", parameters, validate=validate, **kwargs)
174        case "na":
175            data, meta = _na_population(parameters, validate=validate, **kwargs)
176        case _:
177            raise ValueError(f"Invalid source '{source}'. Must be one of: ['erp', 'na']")
178
179    # if requested, make a projection of the data
180    if projection:
181        data = _make_projection(data)
182
183    return data, meta

Fetch Estimated Resident Population (ERP) data from the ABS SDMX API.

Args: source (str): Source of the population data: - "erp": ABS published Estimated Resident Population (default) - "na": Implied population from the ABS National Accounts parameters (dict[str, str] | None): Additional parameters for the API request, such as 'startPeriod'. projection (bool, optional): If True, and data is available for the most recent year, make a projection forward to the current quarter, based on growth over the last 4 quarters. validate (bool, optional): If True, validate the selection against the flow's required dimensions when generating the URL key. Defaults to False. **kwargs: Additional arguments passed to the fetch_selection() function

Returns: tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata

def fetch_state_pop( state: str, parameters: dict[str, str] | None = None, *, projection: bool = False, validate: bool = False, **kwargs: Unpack[sdmxabs.GetFileKwargs]) -> tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame]:
186def fetch_state_pop(
187    state: str,
188    parameters: dict[str, str] | None = None,
189    *,
190    projection: bool = False,
191    validate: bool = False,
192    **kwargs: Unpack[GetFileKwargs],
193) -> tuple[pd.DataFrame, pd.DataFrame]:
194    """Fetch state-level ERP population data from the ABS SDMX API.
195
196    Args:
197        state (str): State/territory name or case-insensitive abbreviation (e.g., "NSW", "Vic", "qld", etc.).
198        parameters (dict[str, str] | None): Additional parameters for the API request,
199            such as 'startPeriod'.
200        projection (bool, optional): If True, make a projection forward to the current quarter
201            based on growth over the last 4 quarters.
202        validate (bool, optional): If True, validate the selection against the flow's
203            required dimensions when generating the URL key. Defaults to False.
204        **kwargs: Additional arguments passed to the fetch_selection() function
205
206    Returns:
207        tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata
208
209    """
210    # report the parameters used if requested
211    verbose = kwargs.get("verbose", False)
212    if verbose:
213        print(f"fetch_state_pop(): {state=} {validate=} {kwargs=}")
214
215
216    full_state_name = _state_name_from_abbrev(state)
217
218    data, meta = _erp_population(
219        full_state_name,
220        parameters,
221        validate=validate,
222        **kwargs
223    )
224
225    if projection:
226        data = _make_projection(data)
227
228    return data, meta

Fetch state-level ERP population data from the ABS SDMX API.

Args: state (str): State/territory name or case-insensitive abbreviation (e.g., "NSW", "Vic", "qld", etc.). parameters (dict[str, str] | None): Additional parameters for the API request, such as 'startPeriod'. projection (bool, optional): If True, make a projection forward to the current quarter based on growth over the last 4 quarters. validate (bool, optional): If True, validate the selection against the flow's required dimensions when generating the URL key. Defaults to False. **kwargs: Additional arguments passed to the fetch_selection() function

Returns: tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata