readabs.read_abs_series
Get specific ABS data series by their ABS series identifiers.
1"""Get specific ABS data series by their ABS series identifiers.""" 2 3from collections.abc import Sequence 4from typing import Unpack, cast 5 6from pandas import DataFrame, Index, PeriodIndex, concat 7 8from readabs.abs_meta_data import metacol 9from readabs.read_abs_cat import read_abs_cat 10from readabs.read_support import ReadArgs, check_kwargs, get_args 11 12 13# --- functions 14def read_abs_series( 15 cat: str, 16 series_id: str | Sequence[str], 17 **kwargs: Unpack[ReadArgs], 18) -> tuple[DataFrame, DataFrame]: 19 """Get specific ABS data series by their ABS catalogue and series identifiers. 20 21 Parameters 22 ---------- 23 cat : str 24 The ABS catalogue ID. 25 26 series_id : str | Sequence[str] 27 An ABS series ID or a sequence of ABS series IDs. 28 29 **kwargs : Any 30 Keyword arguments for the read_abs_series function, 31 which are the same as the keyword arguments for the 32 read_abs_cat function. 33 34 Returns 35 ------- 36 tuple[DataFrame, DataFrame] 37 A tuple of two DataFrames, one for the primary data and one for the metadata. 38 39 Example 40 ------- 41 42 ```python 43 import readabs as ra 44 from pandas import DataFrame 45 cat_num = "6202.0" # The ABS labour force survey 46 unemployment_rate = "A84423050A" 47 seo = "6202001" # The ABS table name 48 data, meta = ra.read_abs_series( 49 cat=cat_num, series_id=unemployment_rate, single_excel_only=seo 50 ) 51 ``` 52 53 """ 54 # check for unexpected keyword arguments/get defaults 55 check_kwargs(kwargs, "read_abs_series") 56 args = get_args(kwargs, "read_abs_series") 57 58 # read the ABS category data 59 cat_data, cat_meta = read_abs_cat(cat, **args) 60 61 # drop repeated series_ids in the meta data, 62 # make unique series_ids the index 63 cat_meta.index = Index(cat_meta[metacol.id]) 64 cat_meta = cat_meta.groupby(cat_meta.index).first() 65 66 # get the ABS series data 67 if isinstance(series_id, str): 68 series_id = [series_id] 69 return_data, return_meta = DataFrame(), DataFrame() 70 for identifier in series_id: 71 # confirm that the series ID is in the catalogue 72 if identifier not in cat_meta.index: 73 if args["verbose"]: 74 print(f"Series ID {identifier} not found in ABS catalogue ID {cat}") 75 if args["ignore_errors"]: 76 continue 77 raise ValueError(f"Series ID {identifier} not found in catalogue {cat}") 78 79 # confirm thay the index of the series is compatible 80 table = str(cat_meta.loc[identifier, metacol.table]) # str for mypy 81 data_series = cat_data[table][identifier] 82 if ( 83 len(return_data) > 0 84 and cast("PeriodIndex", return_data.index).freq != cast("PeriodIndex", data_series.index).freq 85 ): 86 if args["verbose"]: 87 print(f"Frequency mismatch for series ID {identifier}") 88 if args["ignore_errors"]: 89 continue 90 raise ValueError(f"Frequency mismatch for series ID {identifier}") 91 92 # add the series data and meta data to the return values 93 if len(return_data) > 0: 94 return_data = return_data.reindex(return_data.index.union(data_series.index)) 95 return_data[identifier] = data_series 96 return_meta = concat([return_meta, cat_meta.loc[identifier]], axis=1) 97 98 return return_data, return_meta.T 99 100 101if __name__ == "__main__": 102 103 def simple_test() -> None: 104 """Test the read_abs_series function.""" 105 # simple test 106 # Trimmed Mean - through the year CPI growth - seasonally adjusted 107 data, meta = read_abs_series("6401.0", "A3604511X", single_excel_only="640106") 108 print(data.tail()) 109 print(meta.T) 110 print("Done") 111 112 simple_test()
def
read_abs_series( cat: str, series_id: str | Sequence[str], **kwargs: Unpack[readabs.ReadArgs]) -> tuple[pandas.DataFrame, pandas.DataFrame]:
15def read_abs_series( 16 cat: str, 17 series_id: str | Sequence[str], 18 **kwargs: Unpack[ReadArgs], 19) -> tuple[DataFrame, DataFrame]: 20 """Get specific ABS data series by their ABS catalogue and series identifiers. 21 22 Parameters 23 ---------- 24 cat : str 25 The ABS catalogue ID. 26 27 series_id : str | Sequence[str] 28 An ABS series ID or a sequence of ABS series IDs. 29 30 **kwargs : Any 31 Keyword arguments for the read_abs_series function, 32 which are the same as the keyword arguments for the 33 read_abs_cat function. 34 35 Returns 36 ------- 37 tuple[DataFrame, DataFrame] 38 A tuple of two DataFrames, one for the primary data and one for the metadata. 39 40 Example 41 ------- 42 43 ```python 44 import readabs as ra 45 from pandas import DataFrame 46 cat_num = "6202.0" # The ABS labour force survey 47 unemployment_rate = "A84423050A" 48 seo = "6202001" # The ABS table name 49 data, meta = ra.read_abs_series( 50 cat=cat_num, series_id=unemployment_rate, single_excel_only=seo 51 ) 52 ``` 53 54 """ 55 # check for unexpected keyword arguments/get defaults 56 check_kwargs(kwargs, "read_abs_series") 57 args = get_args(kwargs, "read_abs_series") 58 59 # read the ABS category data 60 cat_data, cat_meta = read_abs_cat(cat, **args) 61 62 # drop repeated series_ids in the meta data, 63 # make unique series_ids the index 64 cat_meta.index = Index(cat_meta[metacol.id]) 65 cat_meta = cat_meta.groupby(cat_meta.index).first() 66 67 # get the ABS series data 68 if isinstance(series_id, str): 69 series_id = [series_id] 70 return_data, return_meta = DataFrame(), DataFrame() 71 for identifier in series_id: 72 # confirm that the series ID is in the catalogue 73 if identifier not in cat_meta.index: 74 if args["verbose"]: 75 print(f"Series ID {identifier} not found in ABS catalogue ID {cat}") 76 if args["ignore_errors"]: 77 continue 78 raise ValueError(f"Series ID {identifier} not found in catalogue {cat}") 79 80 # confirm thay the index of the series is compatible 81 table = str(cat_meta.loc[identifier, metacol.table]) # str for mypy 82 data_series = cat_data[table][identifier] 83 if ( 84 len(return_data) > 0 85 and cast("PeriodIndex", return_data.index).freq != cast("PeriodIndex", data_series.index).freq 86 ): 87 if args["verbose"]: 88 print(f"Frequency mismatch for series ID {identifier}") 89 if args["ignore_errors"]: 90 continue 91 raise ValueError(f"Frequency mismatch for series ID {identifier}") 92 93 # add the series data and meta data to the return values 94 if len(return_data) > 0: 95 return_data = return_data.reindex(return_data.index.union(data_series.index)) 96 return_data[identifier] = data_series 97 return_meta = concat([return_meta, cat_meta.loc[identifier]], axis=1) 98 99 return return_data, return_meta.T
Get specific ABS data series by their ABS catalogue and series identifiers.
Parameters
cat : str The ABS catalogue ID.
series_id : str | Sequence[str] An ABS series ID or a sequence of ABS series IDs.
**kwargs : Any Keyword arguments for the read_abs_series function, which are the same as the keyword arguments for the read_abs_cat function.
Returns
tuple[DataFrame, DataFrame] A tuple of two DataFrames, one for the primary data and one for the metadata.
Example
import readabs as ra
from pandas import DataFrame
cat_num = "6202.0" # The ABS labour force survey
unemployment_rate = "A84423050A"
seo = "6202001" # The ABS table name
data, meta = ra.read_abs_series(
cat=cat_num, series_id=unemployment_rate, single_excel_only=seo
)