sdmxabs.fetch_pop
Fetch Australian population data from the ABS SDMX API, either ERP or implied from National Accounts.
1"""Fetch Australian population data from the ABS SDMX API, either ERP or implied from National Accounts.""" 2 3from typing import Literal, Unpack 4 5import numpy as np 6import pandas as pd 7 8from sdmxabs.download_cache import GetFileKwargs 9from sdmxabs.fetch_gdp import fetch_gdp 10from sdmxabs.fetch_selection import MatchType as Mt 11from sdmxabs.fetch_selection import fetch_selection, match_item 12 13# --- constants 14QUARTERS_IN_YEAR = 4 15LAST_QUARTER_TOO_OLD_FOR_PROJECTION = 4 16 17 18# --- private functions 19def _erp_population( 20 parameters: dict[str, str] | None, 21 *, 22 validate: bool, 23 **kwargs: Unpack[GetFileKwargs], 24) -> tuple[pd.DataFrame, pd.DataFrame]: 25 """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API.""" 26 flow_id = "ERP_COMP_Q" 27 selection_criteria = [] 28 selection_criteria.append(match_item("Estimated Resident Population", "MEASURE", Mt.EXACT)) 29 selection_criteria.append(match_item("Australia", "REGION", Mt.EXACT)) 30 selection_criteria.append(match_item("Q", "FREQ", Mt.EXACT)) 31 d, m = fetch_selection(flow_id, selection_criteria, parameters, validate=validate, **kwargs) 32 d.columns = m.index = pd.Index(["Estimated Resident Population"]) 33 return d, m 34 35 36def _na_population( 37 parameters: dict[str, str] | None, 38 *, 39 validate: bool, 40 **kwargs: Unpack[GetFileKwargs], 41) -> tuple[pd.DataFrame, pd.DataFrame]: 42 """Extrapolate population from the National Accounts data from the ABS SDMX API.""" 43 # --- Fetch GDP data 44 gdp, _ = fetch_gdp( 45 seasonality="o", 46 price_measure="cp", 47 parameters=parameters, 48 validate=validate, 49 **kwargs, 50 ) 51 52 # --- Fetch GDP per capita data 53 selection_criteria = [] 54 selection_criteria.append(match_item("Original", "TSEST", Mt.EXACT)) 55 selection_criteria.append(match_item("Current prices", "MEASURE", Mt.EXACT)) 56 selection_criteria.append(match_item("GDP per capita", "DATA_ITEM", Mt.EXACT)) 57 flow_id = "ANA_AGG" 58 d, m = fetch_selection(flow_id, selection_criteria, parameters, validate=validate, **kwargs) 59 60 # --- Extrapolate population from the above two series, Fudge meta-data 61 name = "Implicit Population from GDP" 62 gdp_s = gdp[gdp.columns[0]].astype(float) 63 gdppc_s = d[d.columns[0]].astype(float) 64 pop_s = gdp_s.div(gdppc_s) * 1_000 65 d = pd.DataFrame(pop_s) 66 d.columns = m.index = pd.Index([name]) 67 for k, v in {"UNIT_MEASURE": "NUM", "UNIT_MULT": "3", "DATA_ITEM": name}.items(): 68 if k not in m.columns: 69 continue 70 m.loc[name, k] = v 71 return d, m 72 73 74def _make_projection(data: pd.DataFrame) -> pd.DataFrame: 75 """Make a naive projection of the population data forward to the current quarter. 76 77 Return original data if (for example) the data is empty or too old for a reasonable 78 projection. The projection is based on the annual growth over the latest quarters. 79 80 """ 81 # --- validation/preparation 82 if data.empty: 83 return data # No data to project 84 current_quarter = pd.Timestamp.now().to_period("Q") 85 last_period = data.index[-1] 86 if last_period >= current_quarter: 87 return data # No projection needed 88 if last_period < current_quarter - LAST_QUARTER_TOO_OLD_FOR_PROJECTION: 89 return data # Too old for projection 90 annual_growth: float = data[data.columns[0]].astype(float).pct_change(QUARTERS_IN_YEAR).iloc[-1] 91 if np.isnan(annual_growth): 92 return data # No valid growth rate 93 new_periods = pd.period_range(start=last_period + 1, end=current_quarter, freq="Q") 94 if new_periods.empty: 95 return data # No new periods to project 96 97 # --- Make the projection 98 compound_q_growth_factor = (1 + annual_growth) ** (1 / QUARTERS_IN_YEAR) 99 new_data = pd.Series( 100 data.iloc[-1, 0] * (compound_q_growth_factor ** np.arange(1, len(new_periods) + 1)), index=new_periods 101 ) 102 return pd.DataFrame(data[data.columns[0]].combine_first(new_data)) 103 104 105# --- public functions 106def fetch_pop( 107 source: Literal["erp", "na"] = "erp", 108 parameters: dict[str, str] | None = None, 109 *, 110 projection: bool = False, 111 validate: bool = False, 112 **kwargs: Unpack[GetFileKwargs], 113) -> tuple[pd.DataFrame, pd.DataFrame]: 114 """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API. 115 116 Args: 117 source (str): Source of the population data: 118 - "erp": ABS published Estimated Resident Population (default) 119 - "na": Implied population from the ABS National Accounts 120 parameters (dict[str, str] | None): Additional parameters for the API request, 121 such as 'startPeriod'. 122 projection (bool, optional): If True, and data is available for the most recent year, 123 make a projection forward to the current quarter, based on growth over the last 4 quarters. 124 validate (bool, optional): If True, validate the selection against the flow's 125 required dimensions when generating the URL key. Defaults to False. 126 **kwargs: Additional arguments passed to the fetch_selection() function 127 128 Returns: 129 tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata 130 131 """ 132 # report the parameters used if requested 133 verbose = kwargs.get("verbose", False) 134 if verbose: 135 print(f"fetch_pop(): {source=} {validate=} {kwargs=}") 136 137 # build a selection criteria and fetch the relevant data 138 match source: 139 case "erp": 140 data, meta = _erp_population(parameters, validate=validate, **kwargs) 141 case "na": 142 data, meta = _na_population(parameters, validate=validate, **kwargs) 143 case _: 144 raise ValueError(f"Invalid source '{source}'. Must be one of: ['erp', 'na']") 145 146 # if requested, make a projection of the data 147 if projection: 148 data = _make_projection(data) 149 150 return data, meta 151 152 153if __name__ == "__main__": 154 155 def test_fetch_pop() -> None: 156 """Test function to fetch population data.""" 157 parameters = {"startPeriod": "2024-Q4"} 158 pop_data, pop_meta = fetch_pop(source="na", parameters=parameters, projection=True, verbose=False) 159 print(pop_data, "\n", pop_meta.T) 160 161 test_fetch_pop()
107def fetch_pop( 108 source: Literal["erp", "na"] = "erp", 109 parameters: dict[str, str] | None = None, 110 *, 111 projection: bool = False, 112 validate: bool = False, 113 **kwargs: Unpack[GetFileKwargs], 114) -> tuple[pd.DataFrame, pd.DataFrame]: 115 """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API. 116 117 Args: 118 source (str): Source of the population data: 119 - "erp": ABS published Estimated Resident Population (default) 120 - "na": Implied population from the ABS National Accounts 121 parameters (dict[str, str] | None): Additional parameters for the API request, 122 such as 'startPeriod'. 123 projection (bool, optional): If True, and data is available for the most recent year, 124 make a projection forward to the current quarter, based on growth over the last 4 quarters. 125 validate (bool, optional): If True, validate the selection against the flow's 126 required dimensions when generating the URL key. Defaults to False. 127 **kwargs: Additional arguments passed to the fetch_selection() function 128 129 Returns: 130 tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata 131 132 """ 133 # report the parameters used if requested 134 verbose = kwargs.get("verbose", False) 135 if verbose: 136 print(f"fetch_pop(): {source=} {validate=} {kwargs=}") 137 138 # build a selection criteria and fetch the relevant data 139 match source: 140 case "erp": 141 data, meta = _erp_population(parameters, validate=validate, **kwargs) 142 case "na": 143 data, meta = _na_population(parameters, validate=validate, **kwargs) 144 case _: 145 raise ValueError(f"Invalid source '{source}'. Must be one of: ['erp', 'na']") 146 147 # if requested, make a projection of the data 148 if projection: 149 data = _make_projection(data) 150 151 return data, meta
Fetch Estimated Resident Population (ERP) data from the ABS SDMX API.
Args: source (str): Source of the population data: - "erp": ABS published Estimated Resident Population (default) - "na": Implied population from the ABS National Accounts parameters (dict[str, str] | None): Additional parameters for the API request, such as 'startPeriod'. projection (bool, optional): If True, and data is available for the most recent year, make a projection forward to the current quarter, based on growth over the last 4 quarters. validate (bool, optional): If True, validate the selection against the flow's required dimensions when generating the URL key. Defaults to False. **kwargs: Additional arguments passed to the fetch_selection() function
Returns: tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata