sdmxabs.fetch_pop
Fetch Australian population data from the ABS SDMX API, either ERP or implied from National Accounts.
1"""Fetch Australian population data from the ABS SDMX API, either ERP or implied from National Accounts.""" 2 3from typing import Literal, Unpack 4 5import numpy as np 6import pandas as pd 7 8from sdmxabs.download_cache import GetFileKwargs 9from sdmxabs.fetch_gdp import fetch_gdp 10from sdmxabs.fetch_selection import MatchType as Mt 11from sdmxabs.fetch_selection import fetch_selection 12from sdmxabs.flow_metadata import code_list_for_dim 13 14# --- constants 15FLOW_ID = "ERP_COMP_Q" 16QUARTERS_IN_YEAR = 4 17LAST_QUARTER_TOO_OLD_FOR_PROJECTION = 4 18 19 20# --- private functions 21def _erp_population( 22 state: str, 23 parameters: dict[str, str] | None, 24 *, 25 validate: bool, 26 **kwargs: Unpack[GetFileKwargs], 27) -> tuple[pd.DataFrame, pd.DataFrame]: 28 """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API.""" 29 selection_criteria = [ 30 ("Estimated Resident Population", "MEASURE", Mt.EXACT), 31 ("Q", "FREQ", Mt.EXACT), 32 ] 33 if state: 34 selection_criteria.append((state, "REGION", Mt.EXACT)) 35 d, m = fetch_selection(FLOW_ID, selection_criteria, parameters, validate=validate, **kwargs) 36 return d, m 37 38 39def _na_population( 40 parameters: dict[str, str] | None, 41 *, 42 validate: bool, 43 **kwargs: Unpack[GetFileKwargs], 44) -> tuple[pd.DataFrame, pd.DataFrame]: 45 """Extrapolate Australian population from the National Accounts data from the ABS SDMX API.""" 46 # --- Fetch GDP data 47 gdp, _ = fetch_gdp( 48 seasonality="o", 49 price_measure="cp", 50 parameters=parameters, 51 validate=validate, 52 **kwargs, 53 ) 54 55 # --- Fetch GDP per capita data 56 selection_criteria = [ 57 ("Original", "TSEST", Mt.EXACT), 58 ("Current prices", "MEASURE", Mt.EXACT), 59 ("GDP per capita", "DATA_ITEM", Mt.EXACT), 60 ] 61 flow_id = "ANA_AGG" 62 d, m = fetch_selection(flow_id, selection_criteria, parameters, validate=validate, **kwargs) 63 64 # --- Extrapolate population from the above two series, Fudge meta-data 65 name = "Implicit Population from GDP" 66 gdp_s = gdp[gdp.columns[0]].astype(float) 67 gdppc_s = d[d.columns[0]].astype(float) 68 pop_s = gdp_s.div(gdppc_s) * 1_000 69 d = pd.DataFrame(pop_s) 70 d.columns = m.index = pd.Index([name]) 71 for k, v in {"UNIT_MEASURE": "NUM", "UNIT_MULT": "3", "DATA_ITEM": name}.items(): 72 if k not in m.columns: 73 continue 74 m.loc[name, k] = v 75 return d, m 76 77 78def _make_projection(data: pd.DataFrame) -> pd.DataFrame: 79 """Make a naive projection of the population data forward to the current quarter. 80 81 Return original data if (for example) the data is empty or too old for a reasonable 82 projection. The projection is based on the annual growth over the latest quarters. 83 84 """ 85 # --- validation/preparation 86 if data.empty: 87 return data # No data to project 88 current_quarter = pd.Timestamp.now().to_period("Q") 89 last_period = data.index[-1] 90 if last_period >= current_quarter: 91 return data # No projection needed 92 if last_period < current_quarter - LAST_QUARTER_TOO_OLD_FOR_PROJECTION: 93 return data # Too old for projection 94 annual_growth: float = data[data.columns[0]].astype(float).pct_change(QUARTERS_IN_YEAR).iloc[-1] 95 if np.isnan(annual_growth): 96 return data # No valid growth rate 97 new_periods = pd.period_range(start=last_period + 1, end=current_quarter, freq="Q") 98 if new_periods.empty: 99 return data # No new periods to project 100 101 # --- Make the projection 102 compound_q_growth_factor = (1 + annual_growth) ** (1 / QUARTERS_IN_YEAR) 103 new_data = pd.Series( 104 data.iloc[-1, 0] * (compound_q_growth_factor ** np.arange(1, len(new_periods) + 1)), index=new_periods 105 ) 106 return pd.DataFrame(data[data.columns[0]].combine_first(new_data)) 107 108 109def _state_name_from_abbrev(state: str) -> str: 110 """Convert a state abbreviation to its full name.""" 111 # Abbreviation to full name mapping 112 abbrev_to_name = { 113 "nsw": "New South Wales", 114 "vic": "Victoria", 115 "qld": "Queensland", 116 "sa": "South Australia", 117 "wa": "Western Australia", 118 "tas": "Tasmania", 119 "nt": "Northern Territory", 120 "act": "Australian Capital Territory", 121 } 122 for abbrev in ("aust", "aus", "au"): 123 abbrev_to_name[abbrev] = "Australia" 124 125 lower_case_abbrev = state.lower().strip() 126 state_name = abbrev_to_name.get(lower_case_abbrev, state.strip()) 127 state_names = pd.DataFrame(code_list_for_dim(FLOW_ID, "REGION")).T 128 if state_name not in state_names["name"].to_numpy(): 129 raise ValueError(f"Invalid state '{state_name}'. Available: {list(state_names['name'].unique())}") 130 return state_name 131 132 133# --- public functions 134def fetch_pop( 135 source: Literal["erp", "na"] = "erp", 136 parameters: dict[str, str] | None = None, 137 *, 138 projection: bool = False, 139 validate: bool = False, 140 **kwargs: Unpack[GetFileKwargs], 141) -> tuple[pd.DataFrame, pd.DataFrame]: 142 """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API. 143 144 Args: 145 source (str): Source of the population data: 146 - "erp": ABS published Estimated Resident Population (default) 147 - "na": Implied population from the ABS National Accounts 148 parameters (dict[str, str] | None): Additional parameters for the API request, 149 such as 'startPeriod'. 150 projection (bool, optional): If True, and data is available for the most recent year, 151 make a projection forward to the current quarter, based on growth over the last 4 quarters. 152 validate (bool, optional): If True, validate the selection against the flow's 153 required dimensions when generating the URL key. Defaults to False. 154 **kwargs: Additional arguments passed to the fetch_selection() function 155 156 Returns: 157 tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata 158 159 """ 160 # report the parameters used if requested 161 verbose = kwargs.get("verbose", False) 162 if verbose: 163 print(f"fetch_pop(): {source=} {validate=} {kwargs=}") 164 165 # build a selection criteria and fetch the relevant data 166 match source: 167 case "erp": 168 data, meta = _erp_population("Australia", parameters, validate=validate, **kwargs) 169 case "na": 170 data, meta = _na_population(parameters, validate=validate, **kwargs) 171 case _: 172 raise ValueError(f"Invalid source '{source}'. Must be one of: ['erp', 'na']") 173 174 # if requested, make a projection of the data 175 if projection: 176 data = _make_projection(data) 177 178 return data, meta 179 180 181def fetch_state_pop( 182 state: str, 183 parameters: dict[str, str] | None = None, 184 *, 185 projection: bool = False, 186 validate: bool = False, 187 **kwargs: Unpack[GetFileKwargs], 188) -> tuple[pd.DataFrame, pd.DataFrame]: 189 """Fetch state-level ERP population data from the ABS SDMX API. 190 191 Args: 192 state (str): State/territory name or case-insensitive abbreviation (e.g., "NSW", "Vic", "qld", etc.). 193 [Note: Use "" or "all" for the population estimates for all states.] 194 parameters (dict[str, str] | None): Additional parameters for the API request, 195 such as 'startPeriod'. 196 projection (bool, optional): If True, make a projection forward to the current quarter 197 based on growth over the last 4 quarters. 198 validate (bool, optional): If True, validate the selection against the flow's 199 required dimensions when generating the URL key. Defaults to False. 200 **kwargs: Additional arguments passed to the fetch_selection() function 201 202 Returns: 203 tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata 204 205 """ 206 # report the parameters used if requested 207 verbose = kwargs.get("verbose", False) 208 if verbose: 209 print(f"fetch_state_pop(): {state=} {validate=} {kwargs=}") 210 211 if state.lower() in ("", "all"): 212 full_state_name: str = "" 213 else: 214 full_state_name = _state_name_from_abbrev(state) 215 216 data, meta = _erp_population(full_state_name, parameters, validate=validate, **kwargs) 217 218 if projection: 219 data = _make_projection(data) 220 221 return data, meta 222 223 224if __name__ == "__main__": 225 226 def test_fetch_pop() -> None: 227 """Test function to fetch population data.""" 228 parameters = {"startPeriod": "2023-Q4"} 229 sources: list[Literal["erp", "na"]] = ["erp", "na"] 230 for source in sources: 231 for proj in [False, True]: 232 pop_data, _pop_meta = fetch_pop(source, parameters=parameters, projection=proj, verbose=False) 233 print(f"{source} --> fetch_pop(): {pop_data.index[-1]} = {pop_data.tail(1).iloc[0, 0]:,.0f}") 234 235 def test_fetch_state_pop() -> None: 236 """Test function to fetch state population data.""" 237 # Test abbreviations 238 for state in ["AUS", "VIC", "QLD"]: 239 print(f"{state} --> {_state_name_from_abbrev(state)}") 240 241 # Test fetch_state_pop 242 data, _meta = fetch_state_pop("SA", projection=False, validate=False) 243 print(f"SA: {data.index[-1]} = {data.tail(1).iloc[0, 0]:,.0f}") 244 245 # Test projection 246 data, _meta = fetch_state_pop("SA", projection=True) 247 print(f"SA with projection: {data.index[-1]} = {data.tail(1).iloc[0, 0]:,.0f}") 248 249 # Test getting all state populations 250 data, meta = fetch_state_pop("all", projection=False, validate=False) 251 rename = dict(zip(meta.index, meta["REGION"], strict=False)) 252 data = data.rename(columns=rename) 253 print(f"All states:\n{data.tail(1).T}") 254 255 print("\n" + "=" * 50) 256 test_fetch_pop() 257 print("\n" + "=" * 50) 258 test_fetch_state_pop() 259 print("\n" + "=" * 50)
135def fetch_pop( 136 source: Literal["erp", "na"] = "erp", 137 parameters: dict[str, str] | None = None, 138 *, 139 projection: bool = False, 140 validate: bool = False, 141 **kwargs: Unpack[GetFileKwargs], 142) -> tuple[pd.DataFrame, pd.DataFrame]: 143 """Fetch Estimated Resident Population (ERP) data from the ABS SDMX API. 144 145 Args: 146 source (str): Source of the population data: 147 - "erp": ABS published Estimated Resident Population (default) 148 - "na": Implied population from the ABS National Accounts 149 parameters (dict[str, str] | None): Additional parameters for the API request, 150 such as 'startPeriod'. 151 projection (bool, optional): If True, and data is available for the most recent year, 152 make a projection forward to the current quarter, based on growth over the last 4 quarters. 153 validate (bool, optional): If True, validate the selection against the flow's 154 required dimensions when generating the URL key. Defaults to False. 155 **kwargs: Additional arguments passed to the fetch_selection() function 156 157 Returns: 158 tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata 159 160 """ 161 # report the parameters used if requested 162 verbose = kwargs.get("verbose", False) 163 if verbose: 164 print(f"fetch_pop(): {source=} {validate=} {kwargs=}") 165 166 # build a selection criteria and fetch the relevant data 167 match source: 168 case "erp": 169 data, meta = _erp_population("Australia", parameters, validate=validate, **kwargs) 170 case "na": 171 data, meta = _na_population(parameters, validate=validate, **kwargs) 172 case _: 173 raise ValueError(f"Invalid source '{source}'. Must be one of: ['erp', 'na']") 174 175 # if requested, make a projection of the data 176 if projection: 177 data = _make_projection(data) 178 179 return data, meta
Fetch Estimated Resident Population (ERP) data from the ABS SDMX API.
Args: source (str): Source of the population data: - "erp": ABS published Estimated Resident Population (default) - "na": Implied population from the ABS National Accounts parameters (dict[str, str] | None): Additional parameters for the API request, such as 'startPeriod'. projection (bool, optional): If True, and data is available for the most recent year, make a projection forward to the current quarter, based on growth over the last 4 quarters. validate (bool, optional): If True, validate the selection against the flow's required dimensions when generating the URL key. Defaults to False. **kwargs: Additional arguments passed to the fetch_selection() function
Returns: tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata
182def fetch_state_pop( 183 state: str, 184 parameters: dict[str, str] | None = None, 185 *, 186 projection: bool = False, 187 validate: bool = False, 188 **kwargs: Unpack[GetFileKwargs], 189) -> tuple[pd.DataFrame, pd.DataFrame]: 190 """Fetch state-level ERP population data from the ABS SDMX API. 191 192 Args: 193 state (str): State/territory name or case-insensitive abbreviation (e.g., "NSW", "Vic", "qld", etc.). 194 [Note: Use "" or "all" for the population estimates for all states.] 195 parameters (dict[str, str] | None): Additional parameters for the API request, 196 such as 'startPeriod'. 197 projection (bool, optional): If True, make a projection forward to the current quarter 198 based on growth over the last 4 quarters. 199 validate (bool, optional): If True, validate the selection against the flow's 200 required dimensions when generating the URL key. Defaults to False. 201 **kwargs: Additional arguments passed to the fetch_selection() function 202 203 Returns: 204 tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata 205 206 """ 207 # report the parameters used if requested 208 verbose = kwargs.get("verbose", False) 209 if verbose: 210 print(f"fetch_state_pop(): {state=} {validate=} {kwargs=}") 211 212 if state.lower() in ("", "all"): 213 full_state_name: str = "" 214 else: 215 full_state_name = _state_name_from_abbrev(state) 216 217 data, meta = _erp_population(full_state_name, parameters, validate=validate, **kwargs) 218 219 if projection: 220 data = _make_projection(data) 221 222 return data, meta
Fetch state-level ERP population data from the ABS SDMX API.
Args: state (str): State/territory name or case-insensitive abbreviation (e.g., "NSW", "Vic", "qld", etc.). [Note: Use "" or "all" for the population estimates for all states.] parameters (dict[str, str] | None): Additional parameters for the API request, such as 'startPeriod'. projection (bool, optional): If True, make a projection forward to the current quarter based on growth over the last 4 quarters. validate (bool, optional): If True, validate the selection against the flow's required dimensions when generating the URL key. Defaults to False. **kwargs: Additional arguments passed to the fetch_selection() function
Returns: tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the population data and metadata