sdmxabs.fetch_pop
Fetch Australian population data from the ABS SDMX API, either ERP or implied from National Accounts.
1"""Fetch Australian population data from the ABS SDMX API, either ERP or implied from National Accounts.""" 2 3from typing import Literal, Unpack 4 5import numpy as np 6import pandas as pd 7 8from sdmxabs.download_cache import GetFileKwargs 9from sdmxabs.fetch_gdp import fetch_gdp 10from sdmxabs.fetch_selection import MatchType as Mt 11from sdmxabs.fetch_selection import fetch_selection 12from sdmxabs.flow_metadata import code_list_for, structure_ident 13 14# --- constants 15FLOW_ID = "ERP_COMP_Q" 16STRUCTURE_ID = structure_ident(FLOW_ID) 17QUARTERS_IN_YEAR = 4 18LAST_QUARTER_TOO_OLD_FOR_PROJECTION = 4 19 20 21# --- private functions 22def _erp_population( 23 state: str, 24 parameters: dict[str, str] | None, 25 *, 26 validate: bool, 27 **kwargs: Unpack[GetFileKwargs], 28) -> tuple[pd.DataFrame, pd.DataFrame]: 29 """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API.""" 30 selection_criteria = [ 31 ("Estimated Resident Population", "MEASURE", Mt.EXACT), 32 ("Q", "FREQ", Mt.EXACT), 33 ] 34 if state: 35 selection_criteria.append((state, "REGION", Mt.EXACT)) 36 d, m = fetch_selection(FLOW_ID, selection_criteria, parameters, validate=validate, **kwargs) 37 return d, m 38 39 40def _na_population( 41 parameters: dict[str, str] | None, 42 *, 43 validate: bool, 44 **kwargs: Unpack[GetFileKwargs], 45) -> tuple[pd.DataFrame, pd.DataFrame]: 46 """Extrapolate Australian population from the National Accounts data from the ABS SDMX API.""" 47 # --- Fetch GDP data 48 gdp, _ = fetch_gdp( 49 seasonality="o", 50 price_measure="cp", 51 parameters=parameters, 52 validate=validate, 53 **kwargs, 54 ) 55 56 # --- Fetch GDP per capita data 57 selection_criteria = [ 58 ("Original", "TSEST", Mt.EXACT), 59 ("Current prices", "MEASURE", Mt.EXACT), 60 ("GDP per capita", "DATA_ITEM", Mt.EXACT), 61 ] 62 flow_id = "ANA_AGG" 63 d, m = fetch_selection(flow_id, selection_criteria, parameters, validate=validate, **kwargs) 64 65 # --- Extrapolate population from the above two series, Fudge meta-data 66 name = "Implicit Population from GDP" 67 gdp_s = gdp[gdp.columns[0]].astype(float) 68 gdppc_s = d[d.columns[0]].astype(float) 69 pop_s = gdp_s.div(gdppc_s) * 1_000 70 d = pd.DataFrame(pop_s) 71 d.columns = m.index = pd.Index([name]) 72 for k, v in {"UNIT_MEASURE": "NUM", "UNIT_MULT": "3", "DATA_ITEM": name}.items(): 73 if k not in m.columns: 74 continue 75 m.loc[name, k] = v 76 return d, m 77 78 79def _make_projection(data: pd.DataFrame) -> pd.DataFrame: 80 """Make a naive projection of the population data forward to the current quarter. 81 82 Return original data if (for example) the data is empty or too old for a reasonable 83 projection. The projection is based on the annual growth over the latest quarters. 84 85 """ 86 # --- validation/preparation 87 if data.empty: 88 return data # No data to project 89 current_quarter = pd.Timestamp.now().to_period("Q") 90 last_period = data.index[-1] 91 if last_period >= current_quarter: 92 return data # No projection needed 93 if last_period < current_quarter - LAST_QUARTER_TOO_OLD_FOR_PROJECTION: 94 return data # Too old for projection 95 annual_growth: float = data[data.columns[0]].astype(float).pct_change(QUARTERS_IN_YEAR).iloc[-1] 96 if np.isnan(annual_growth): 97 return data # No valid growth rate 98 new_periods = pd.period_range(start=last_period + 1, end=current_quarter, freq="Q") 99 if new_periods.empty: 100 return data # No new periods to project 101 102 # --- Make the projection 103 compound_q_growth_factor = (1 + annual_growth) ** (1 / QUARTERS_IN_YEAR) 104 new_data = pd.Series( 105 data.iloc[-1, 0] * (compound_q_growth_factor ** np.arange(1, len(new_periods) + 1)), index=new_periods 106 ) 107 return pd.DataFrame(data[data.columns[0]].combine_first(new_data)) 108 109 110def _state_name_from_abbrev(state: str) -> str: 111 """Convert a state abbreviation to its full name.""" 112 # Abbreviation to full name mapping 113 abbrev_to_name = { 114 "nsw": "New South Wales", 115 "vic": "Victoria", 116 "qld": "Queensland", 117 "sa": "South Australia", 118 "wa": "Western Australia", 119 "tas": "Tasmania", 120 "nt": "Northern Territory", 121 "act": "Australian Capital Territory", 122 } 123 for abbrev in ("aust", "aus", "au"): 124 abbrev_to_name[abbrev] = "Australia" 125 126 lower_case_abbrev = state.lower().strip() 127 state_name = abbrev_to_name.get(lower_case_abbrev, state.strip()) 128 state_names = pd.DataFrame(code_list_for(STRUCTURE_ID, "REGION")).T 129 if state_name not in state_names["name"].to_numpy(): 130 raise ValueError(f"Invalid state '{state_name}'. Available: {list(state_names['name'].unique())}") 131 return state_name 132 133 134# --- public functions 135def fetch_pop( 136 source: Literal["erp", "na"] = "erp", 137 parameters: dict[str, str] | None = None, 138 *, 139 projection: bool = False, 140 validate: bool = False, 141 **kwargs: Unpack[GetFileKwargs], 142) -> tuple[pd.DataFrame, pd.DataFrame]: 143 """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API. 144 145 Args: 146 source (str): Source of the population data: 147 - "erp": ABS published Estimated Resident Population (default) 148 - "na": Implied population from the ABS National Accounts 149 parameters (dict[str, str] | None): Additional parameters for the API request, 150 such as 'startPeriod'. 151 projection (bool, optional): If True, and data is available for the most recent year, 152 make a projection forward to the current quarter, based on growth over the last 4 quarters. 153 validate (bool, optional): If True, validate the selection against the flow's 154 required dimensions when generating the URL key. Defaults to False. 155 **kwargs: Additional arguments passed to the fetch_selection() function 156 157 Returns: 158 tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata 159 160 """ 161 # report the parameters used if requested 162 verbose = kwargs.get("verbose", False) 163 if verbose: 164 print(f"fetch_pop(): {source=} {validate=} {kwargs=}") 165 166 # build a selection criteria and fetch the relevant data 167 match source: 168 case "erp": 169 data, meta = _erp_population("Australia", parameters, validate=validate, **kwargs) 170 case "na": 171 data, meta = _na_population(parameters, validate=validate, **kwargs) 172 case _: 173 raise ValueError(f"Invalid source '{source}'. Must be one of: ['erp', 'na']") 174 175 # if requested, make a projection of the data 176 if projection: 177 data = _make_projection(data) 178 179 return data, meta 180 181 182def fetch_state_pop( 183 state: str, 184 parameters: dict[str, str] | None = None, 185 *, 186 projection: bool = False, 187 validate: bool = False, 188 **kwargs: Unpack[GetFileKwargs], 189) -> tuple[pd.DataFrame, pd.DataFrame]: 190 """Fetch state-level ERP population data from the ABS SDMX API. 191 192 Args: 193 state (str): State/territory name or case-insensitive abbreviation (e.g., "NSW", "Vic", "qld", etc.). 194 [Note: Use "" or "all" for the population estimates for all states.] 195 parameters (dict[str, str] | None): Additional parameters for the API request, 196 such as 'startPeriod'. 197 projection (bool, optional): If True, make a projection forward to the current quarter 198 based on growth over the last 4 quarters. 199 validate (bool, optional): If True, validate the selection against the flow's 200 required dimensions when generating the URL key. Defaults to False. 201 **kwargs: Additional arguments passed to the fetch_selection() function 202 203 Returns: 204 tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata 205 206 """ 207 # report the parameters used if requested 208 verbose = kwargs.get("verbose", False) 209 if verbose: 210 print(f"fetch_state_pop(): {state=} {validate=} {kwargs=}") 211 212 if state.lower() in ("", "all"): 213 full_state_name: str = "" 214 else: 215 full_state_name = _state_name_from_abbrev(state) 216 217 data, meta = _erp_population(full_state_name, parameters, validate=validate, **kwargs) 218 219 if projection: 220 data = _make_projection(data) 221 222 return data, meta 223 224 225if __name__ == "__main__": 226 227 def test_fetch_pop() -> None: 228 """Test function to fetch population data.""" 229 parameters = {"startPeriod": "2023-Q4"} 230 sources: list[Literal["erp", "na"]] = ["erp", "na"] 231 for source in sources: 232 for proj in [False, True]: 233 pop_data, _pop_meta = fetch_pop(source, parameters=parameters, projection=proj, verbose=False) 234 print(f"{source} --> fetch_pop(): {pop_data.index[-1]} = {pop_data.tail(1).iloc[0, 0]:,.0f}") 235 236 def test_fetch_state_pop() -> None: 237 """Test function to fetch state population data.""" 238 # Test abbreviations 239 for state in ["AUS", "VIC", "QLD"]: 240 print(f"{state} --> {_state_name_from_abbrev(state)}") 241 242 # Test fetch_state_pop 243 data, _meta = fetch_state_pop("SA", projection=False, validate=False) 244 print(f"SA: {data.index[-1]} = {data.tail(1).iloc[0, 0]:,.0f}") 245 246 # Test projection 247 data, _meta = fetch_state_pop("SA", projection=True) 248 print(f"SA with projection: {data.index[-1]} = {data.tail(1).iloc[0, 0]:,.0f}") 249 250 # Test getting all state populations 251 data, meta = fetch_state_pop("all", projection=False, validate=False) 252 rename = dict(zip(meta.index, meta["REGION"], strict=False)) 253 data = data.rename(columns=rename) 254 print(f"All states:\n{data.tail(1).T}") 255 256 print("\n" + "=" * 50) 257 test_fetch_pop() 258 print("\n" + "=" * 50) 259 test_fetch_state_pop() 260 print("\n" + "=" * 50)
136def fetch_pop( 137 source: Literal["erp", "na"] = "erp", 138 parameters: dict[str, str] | None = None, 139 *, 140 projection: bool = False, 141 validate: bool = False, 142 **kwargs: Unpack[GetFileKwargs], 143) -> tuple[pd.DataFrame, pd.DataFrame]: 144 """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API. 145 146 Args: 147 source (str): Source of the population data: 148 - "erp": ABS published Estimated Resident Population (default) 149 - "na": Implied population from the ABS National Accounts 150 parameters (dict[str, str] | None): Additional parameters for the API request, 151 such as 'startPeriod'. 152 projection (bool, optional): If True, and data is available for the most recent year, 153 make a projection forward to the current quarter, based on growth over the last 4 quarters. 154 validate (bool, optional): If True, validate the selection against the flow's 155 required dimensions when generating the URL key. Defaults to False. 156 **kwargs: Additional arguments passed to the fetch_selection() function 157 158 Returns: 159 tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata 160 161 """ 162 # report the parameters used if requested 163 verbose = kwargs.get("verbose", False) 164 if verbose: 165 print(f"fetch_pop(): {source=} {validate=} {kwargs=}") 166 167 # build a selection criteria and fetch the relevant data 168 match source: 169 case "erp": 170 data, meta = _erp_population("Australia", parameters, validate=validate, **kwargs) 171 case "na": 172 data, meta = _na_population(parameters, validate=validate, **kwargs) 173 case _: 174 raise ValueError(f"Invalid source '{source}'. Must be one of: ['erp', 'na']") 175 176 # if requested, make a projection of the data 177 if projection: 178 data = _make_projection(data) 179 180 return data, meta
Fetch Estimated Resident Population (ERP) data from the ABS SDMX API.
Args: source (str): Source of the population data: - "erp": ABS published Estimated Resident Population (default) - "na": Implied population from the ABS National Accounts parameters (dict[str, str] | None): Additional parameters for the API request, such as 'startPeriod'. projection (bool, optional): If True, and data is available for the most recent year, make a projection forward to the current quarter, based on growth over the last 4 quarters. validate (bool, optional): If True, validate the selection against the flow's required dimensions when generating the URL key. Defaults to False. **kwargs: Additional arguments passed to the fetch_selection() function
Returns: tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata
183def fetch_state_pop( 184 state: str, 185 parameters: dict[str, str] | None = None, 186 *, 187 projection: bool = False, 188 validate: bool = False, 189 **kwargs: Unpack[GetFileKwargs], 190) -> tuple[pd.DataFrame, pd.DataFrame]: 191 """Fetch state-level ERP population data from the ABS SDMX API. 192 193 Args: 194 state (str): State/territory name or case-insensitive abbreviation (e.g., "NSW", "Vic", "qld", etc.). 195 [Note: Use "" or "all" for the population estimates for all states.] 196 parameters (dict[str, str] | None): Additional parameters for the API request, 197 such as 'startPeriod'. 198 projection (bool, optional): If True, make a projection forward to the current quarter 199 based on growth over the last 4 quarters. 200 validate (bool, optional): If True, validate the selection against the flow's 201 required dimensions when generating the URL key. Defaults to False. 202 **kwargs: Additional arguments passed to the fetch_selection() function 203 204 Returns: 205 tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata 206 207 """ 208 # report the parameters used if requested 209 verbose = kwargs.get("verbose", False) 210 if verbose: 211 print(f"fetch_state_pop(): {state=} {validate=} {kwargs=}") 212 213 if state.lower() in ("", "all"): 214 full_state_name: str = "" 215 else: 216 full_state_name = _state_name_from_abbrev(state) 217 218 data, meta = _erp_population(full_state_name, parameters, validate=validate, **kwargs) 219 220 if projection: 221 data = _make_projection(data) 222 223 return data, meta
Fetch state-level ERP population data from the ABS SDMX API.
Args: state (str): State/territory name or case-insensitive abbreviation (e.g., "NSW", "Vic", "qld", etc.). [Note: Use "" or "all" for the population estimates for all states.] parameters (dict[str, str] | None): Additional parameters for the API request, such as 'startPeriod'. projection (bool, optional): If True, make a projection forward to the current quarter based on growth over the last 4 quarters. validate (bool, optional): If True, validate the selection against the flow's required dimensions when generating the URL key. Defaults to False. **kwargs: Additional arguments passed to the fetch_selection() function
Returns: tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata