sdmxabs.fetch_pop
Fetch Australian population data from the ABS SDMX API, either ERP or implied from National Accounts.
1"""Fetch Australian population data from the ABS SDMX API, either ERP or implied from National Accounts.""" 2 3from typing import Literal, Unpack 4 5import numpy as np 6import pandas as pd 7 8from sdmxabs.download_cache import GetFileKwargs 9from sdmxabs.fetch_gdp import fetch_gdp 10from sdmxabs.fetch_selection import MatchType as Mt 11from sdmxabs.fetch_selection import fetch_selection 12from sdmxabs.flow_metadata import code_list_for_dim 13 14 15# --- constants 16FLOW_ID = "ERP_COMP_Q" 17QUARTERS_IN_YEAR = 4 18LAST_QUARTER_TOO_OLD_FOR_PROJECTION = 4 19 20 21# --- private functions 22def _erp_population( 23 state: str, 24 parameters: dict[str, str] | None, 25 *, 26 validate: bool, 27 **kwargs: Unpack[GetFileKwargs], 28) -> tuple[pd.DataFrame, pd.DataFrame]: 29 """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API.""" 30 selection_criteria = [ 31 ("Estimated Resident Population", "MEASURE", Mt.EXACT), 32 (state, "REGION", Mt.EXACT), 33 ("Q", "FREQ", Mt.EXACT), 34 ] 35 d, m = fetch_selection(FLOW_ID, selection_criteria, parameters, validate=validate, **kwargs) 36 if len(m) != 1: 37 raise ValueError(f"Expected 1 match for {state}, found {len(m)}") 38 d.columns = m.index = pd.Index(["Estimated Resident Population"]) 39 return d, m 40 41 42def _na_population( 43 parameters: dict[str, str] | None, 44 *, 45 validate: bool, 46 **kwargs: Unpack[GetFileKwargs], 47) -> tuple[pd.DataFrame, pd.DataFrame]: 48 """Extrapolate population from the National Accounts data from the ABS SDMX API.""" 49 # --- Fetch GDP data 50 gdp, _ = fetch_gdp( 51 seasonality="o", 52 price_measure="cp", 53 parameters=parameters, 54 validate=validate, 55 **kwargs, 56 ) 57 58 # --- Fetch GDP per capita data 59 selection_criteria = [ 60 ("Original", "TSEST", Mt.EXACT), 61 ("Current prices", "MEASURE", Mt.EXACT), 62 ("GDP per capita", "DATA_ITEM", Mt.EXACT), 63 ] 64 flow_id = "ANA_AGG" 65 d, m = fetch_selection(flow_id, selection_criteria, parameters, validate=validate, **kwargs) 66 67 # --- Extrapolate population from the above two series, Fudge meta-data 68 name = "Implicit Population from GDP" 69 gdp_s = gdp[gdp.columns[0]].astype(float) 70 gdppc_s = d[d.columns[0]].astype(float) 71 pop_s = gdp_s.div(gdppc_s) * 1_000 72 d = pd.DataFrame(pop_s) 73 d.columns = m.index = pd.Index([name]) 74 for k, v in {"UNIT_MEASURE": "NUM", "UNIT_MULT": "3", "DATA_ITEM": name}.items(): 75 if k not in m.columns: 76 continue 77 m.loc[name, k] = v 78 return d, m 79 80 81def _make_projection(data: pd.DataFrame) -> pd.DataFrame: 82 """Make a naive projection of the population data forward to the current quarter. 83 84 Return original data if (for example) the data is empty or too old for a reasonable 85 projection. The projection is based on the annual growth over the latest quarters. 86 87 """ 88 # --- validation/preparation 89 if data.empty: 90 return data # No data to project 91 current_quarter = pd.Timestamp.now().to_period("Q") 92 last_period = data.index[-1] 93 if last_period >= current_quarter: 94 return data # No projection needed 95 if last_period < current_quarter - LAST_QUARTER_TOO_OLD_FOR_PROJECTION: 96 return data # Too old for projection 97 annual_growth: float = data[data.columns[0]].astype(float).pct_change(QUARTERS_IN_YEAR).iloc[-1] 98 if np.isnan(annual_growth): 99 return data # No valid growth rate 100 new_periods = pd.period_range(start=last_period + 1, end=current_quarter, freq="Q") 101 if new_periods.empty: 102 return data # No new periods to project 103 104 # --- Make the projection 105 compound_q_growth_factor = (1 + annual_growth) ** (1 / QUARTERS_IN_YEAR) 106 new_data = pd.Series( 107 data.iloc[-1, 0] * (compound_q_growth_factor ** np.arange(1, len(new_periods) + 1)), index=new_periods 108 ) 109 return pd.DataFrame(data[data.columns[0]].combine_first(new_data)) 110 111 112def _state_name_from_abbrev(state: str) -> str: 113 """Convert a state abbreviation to its full name.""" 114 115 # Abbreviation to full name mapping 116 abbrev_to_name = { 117 "nsw": "New South Wales", 118 "vic": "Victoria", 119 "qld": "Queensland", 120 "sa": "South Australia", 121 "wa": "Western Australia", 122 "tas": "Tasmania", 123 "nt": "Northern Territory", 124 "act": "Australian Capital Territory", 125 } 126 for abbrev in ("aust", "aus", "au"): 127 abbrev_to_name[abbrev] = "Australia" 128 129 lower_case_abbrev = state.lower().strip() 130 state_name = abbrev_to_name.get(lower_case_abbrev, state.strip()) 131 state_names = pd.DataFrame(code_list_for_dim(FLOW_ID, "REGION")).T 132 if state_name not in state_names["name"].values: 133 raise ValueError(f"Invalid state '{state_name}'. Available: {list(state_names['name'].unique())}") 134 return state_name 135 136 137# --- public functions 138def fetch_pop( 139 source: Literal["erp", "na"] = "erp", 140 parameters: dict[str, str] | None = None, 141 *, 142 projection: bool = False, 143 validate: bool = False, 144 **kwargs: Unpack[GetFileKwargs], 145) -> tuple[pd.DataFrame, pd.DataFrame]: 146 """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API. 147 148 Args: 149 source (str): Source of the population data: 150 - "erp": ABS published Estimated Resident Population (default) 151 - "na": Implied population from the ABS National Accounts 152 parameters (dict[str, str] | None): Additional parameters for the API request, 153 such as 'startPeriod'. 154 projection (bool, optional): If True, and data is available for the most recent year, 155 make a projection forward to the current quarter, based on growth over the last 4 quarters. 156 validate (bool, optional): If True, validate the selection against the flow's 157 required dimensions when generating the URL key. Defaults to False. 158 **kwargs: Additional arguments passed to the fetch_selection() function 159 160 Returns: 161 tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata 162 163 """ 164 # report the parameters used if requested 165 verbose = kwargs.get("verbose", False) 166 if verbose: 167 print(f"fetch_pop(): {source=} {validate=} {kwargs=}") 168 169 # build a selection criteria and fetch the relevant data 170 match source: 171 case "erp": 172 data, meta = _erp_population("Australia", parameters, validate=validate, **kwargs) 173 case "na": 174 data, meta = _na_population(parameters, validate=validate, **kwargs) 175 case _: 176 raise ValueError(f"Invalid source '{source}'. Must be one of: ['erp', 'na']") 177 178 # if requested, make a projection of the data 179 if projection: 180 data = _make_projection(data) 181 182 return data, meta 183 184 185def fetch_state_pop( 186 state: str, 187 parameters: dict[str, str] | None = None, 188 *, 189 projection: bool = False, 190 validate: bool = False, 191 **kwargs: Unpack[GetFileKwargs], 192) -> tuple[pd.DataFrame, pd.DataFrame]: 193 """Fetch state-level ERP population data from the ABS SDMX API. 194 195 Args: 196 state (str): State/territory name or case-insensitive abbreviation (e.g., "NSW", "Vic", "qld", etc.). 197 parameters (dict[str, str] | None): Additional parameters for the API request, 198 such as 'startPeriod'. 199 projection (bool, optional): If True, make a projection forward to the current quarter 200 based on growth over the last 4 quarters. 201 validate (bool, optional): If True, validate the selection against the flow's 202 required dimensions when generating the URL key. Defaults to False. 203 **kwargs: Additional arguments passed to the fetch_selection() function 204 205 Returns: 206 tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata 207 208 """ 209 # report the parameters used if requested 210 verbose = kwargs.get("verbose", False) 211 if verbose: 212 print(f"fetch_state_pop(): {state=} {validate=} {kwargs=}") 213 214 215 full_state_name = _state_name_from_abbrev(state) 216 217 data, meta = _erp_population( 218 full_state_name, 219 parameters, 220 validate=validate, 221 **kwargs 222 ) 223 224 if projection: 225 data = _make_projection(data) 226 227 return data, meta 228 229 230if __name__ == "__main__": 231 232 def test_fetch_pop() -> None: 233 """Test function to fetch population data.""" 234 parameters = {"startPeriod": "2023-Q4"} 235 for source in ["erp", "na"]: 236 for proj in [False, True]: 237 pop_data, _pop_meta = fetch_pop(source, parameters=parameters, projection=proj, verbose=False) 238 print(f"{source} --> fetch_pop(): {pop_data.index[-1]} = {pop_data.tail(1).iloc[0, 0]:,.0f}") 239 240 def test_fetch_state_pop() -> None: 241 """Test function to fetch state population data.""" 242 243 # Test abbreviations 244 for state in ["AUS", "VIC", "QLD"]: 245 print(f"{state} --> {_state_name_from_abbrev(state)}") 246 247 # Test fetch_state_pop 248 data, _meta = fetch_state_pop("SA", projection=False, validate=False) 249 print(f"SA: {data.index[-1]} = {data.tail(1).iloc[0, 0]:,.0f}") 250 251 # Test projection 252 data, _meta = fetch_state_pop("SA", projection=True) 253 print(f"SA with projection: {data.index[-1]} = {data.tail(1).iloc[0, 0]:,.0f}") 254 255 print("\n" + "="*50) 256 test_fetch_pop() 257 print("\n" + "="*50) 258 test_fetch_state_pop() 259 print("\n" + "="*50)
139def fetch_pop( 140 source: Literal["erp", "na"] = "erp", 141 parameters: dict[str, str] | None = None, 142 *, 143 projection: bool = False, 144 validate: bool = False, 145 **kwargs: Unpack[GetFileKwargs], 146) -> tuple[pd.DataFrame, pd.DataFrame]: 147 """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API. 148 149 Args: 150 source (str): Source of the population data: 151 - "erp": ABS published Estimated Resident Population (default) 152 - "na": Implied population from the ABS National Accounts 153 parameters (dict[str, str] | None): Additional parameters for the API request, 154 such as 'startPeriod'. 155 projection (bool, optional): If True, and data is available for the most recent year, 156 make a projection forward to the current quarter, based on growth over the last 4 quarters. 157 validate (bool, optional): If True, validate the selection against the flow's 158 required dimensions when generating the URL key. Defaults to False. 159 **kwargs: Additional arguments passed to the fetch_selection() function 160 161 Returns: 162 tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata 163 164 """ 165 # report the parameters used if requested 166 verbose = kwargs.get("verbose", False) 167 if verbose: 168 print(f"fetch_pop(): {source=} {validate=} {kwargs=}") 169 170 # build a selection criteria and fetch the relevant data 171 match source: 172 case "erp": 173 data, meta = _erp_population("Australia", parameters, validate=validate, **kwargs) 174 case "na": 175 data, meta = _na_population(parameters, validate=validate, **kwargs) 176 case _: 177 raise ValueError(f"Invalid source '{source}'. Must be one of: ['erp', 'na']") 178 179 # if requested, make a projection of the data 180 if projection: 181 data = _make_projection(data) 182 183 return data, meta
Fetch Estimated Resident Population (ERP) data from the ABS SDMX API.
Args: source (str): Source of the population data: - "erp": ABS published Estimated Resident Population (default) - "na": Implied population from the ABS National Accounts parameters (dict[str, str] | None): Additional parameters for the API request, such as 'startPeriod'. projection (bool, optional): If True, and data is available for the most recent year, make a projection forward to the current quarter, based on growth over the last 4 quarters. validate (bool, optional): If True, validate the selection against the flow's required dimensions when generating the URL key. Defaults to False. **kwargs: Additional arguments passed to the fetch_selection() function
Returns: tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata
186def fetch_state_pop( 187 state: str, 188 parameters: dict[str, str] | None = None, 189 *, 190 projection: bool = False, 191 validate: bool = False, 192 **kwargs: Unpack[GetFileKwargs], 193) -> tuple[pd.DataFrame, pd.DataFrame]: 194 """Fetch state-level ERP population data from the ABS SDMX API. 195 196 Args: 197 state (str): State/territory name or case-insensitive abbreviation (e.g., "NSW", "Vic", "qld", etc.). 198 parameters (dict[str, str] | None): Additional parameters for the API request, 199 such as 'startPeriod'. 200 projection (bool, optional): If True, make a projection forward to the current quarter 201 based on growth over the last 4 quarters. 202 validate (bool, optional): If True, validate the selection against the flow's 203 required dimensions when generating the URL key. Defaults to False. 204 **kwargs: Additional arguments passed to the fetch_selection() function 205 206 Returns: 207 tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata 208 209 """ 210 # report the parameters used if requested 211 verbose = kwargs.get("verbose", False) 212 if verbose: 213 print(f"fetch_state_pop(): {state=} {validate=} {kwargs=}") 214 215 216 full_state_name = _state_name_from_abbrev(state) 217 218 data, meta = _erp_population( 219 full_state_name, 220 parameters, 221 validate=validate, 222 **kwargs 223 ) 224 225 if projection: 226 data = _make_projection(data) 227 228 return data, meta
Fetch state-level ERP population data from the ABS SDMX API.
Args: state (str): State/territory name or case-insensitive abbreviation (e.g., "NSW", "Vic", "qld", etc.). parameters (dict[str, str] | None): Additional parameters for the API request, such as 'startPeriod'. projection (bool, optional): If True, make a projection forward to the current quarter based on growth over the last 4 quarters. validate (bool, optional): If True, validate the selection against the flow's required dimensions when generating the URL key. Defaults to False. **kwargs: Additional arguments passed to the fetch_selection() function
Returns: tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata