Coverage for preprocess/constants.py: 100%

8 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2026-02-05 06:46 -0600

1""" 

2crate_anon/preprocess/constants.py 

3 

4=============================================================================== 

5 

6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry. 

7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

8 

9 This file is part of CRATE. 

10 

11 CRATE is free software: you can redistribute it and/or modify 

12 it under the terms of the GNU General Public License as published by 

13 the Free Software Foundation, either version 3 of the License, or 

14 (at your option) any later version. 

15 

16 CRATE is distributed in the hope that it will be useful, 

17 but WITHOUT ANY WARRANTY; without even the implied warranty of 

18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

19 GNU General Public License for more details. 

20 

21 You should have received a copy of the GNU General Public License 

22 along with CRATE. If not, see <https://www.gnu.org/licenses/>. 

23 

24=============================================================================== 

25 

26**Constants used for lots of preprocessing functions.** 

27 

28""" 

29 

30# ----------------------------------------------------------------------------- 

31# Indexes in destination databases 

32# ----------------------------------------------------------------------------- 

33# Prefix for index names: 

34CRATE_IDX_PREFIX = "crate_idx" 

35 

36# ----------------------------------------------------------------------------- 

37# Columns for source databases 

38# ----------------------------------------------------------------------------- 

39# If our preprocessors need to create a primary key (PK) column in a table as 

40# part of preprocessing, we call it this: 

41CRATE_COL_PK = "crate_pk" 

42# Making it explicitly CRATE-related makes it distinctive from more generic 

43# names like "id" or "pk" or "RowIdentifier". 

44 

45# Text extraction from documents 

46CRATE_TABLE_EXTRACTED_TEXT = "crate_extracted_text" 

47CRATE_COL_FILE_PATH = "crate_file_path" 

48CRATE_COL_TEXT = "crate_text" 

49CRATE_COL_TEXT_LAST_EXTRACTED = "crate_text_last_extracted" 

50 

51# ----------------------------------------------------------------------------- 

52# Columns in ONS Postcode Database (from CRATE import) 

53# ----------------------------------------------------------------------------- 

54ONSPD_TABLE_POSTCODE = "postcode" 

55DEFAULT_GEOG_COLS = [ 

56 # These are geographically "blurry" areas. The most specific is likely 

57 # LSOA (or, equivalently, IMD). 

58 # For details, see postcodes.py. 

59 "bua11", # Built-up Area (BUA) 

60 "buasd11", # Built-up Area Sub-division (BUASD) 

61 "casward", # Census Area Statistics (CAS) ward 

62 "imd", # Index of Multiple Deprivation (IMD) [rank of LSOA/equivalent] 

63 "lea", # todo: gone? (Local Education Authority in previous data?) 

64 "lsoa01", # 2001 Census Lower Layer Super Output Area (LSOA) or equivalent 

65 "lsoa11", # 2011 Census Lower Layer Super Output Area (LSOA) or equiv. 

66 "msoa01", # 2001 Census Middle Layer Super Output Area (MSOA) or equiv. 

67 "msoa11", # 2011 Census Middle Layer Super Output Area (MSOA) or equiv. 

68 "nuts", # EU Local Administrative Unit, level 2 

69 "oac01", # 2001 Census Output Area classification (OAC) 

70 "oac11", # 2011 Census Output Area classification (OAC) 

71 "parish", # Parish/community 

72 "pcon", # Westminster parliamentary constituency 

73 "pct", # Primary Care Trust or equivalent 

74 "ru11ind", # 2011 Census rural-urban classification 

75 "statsward", # Statistical ward 

76 "ur01ind", # 2001 Census urban/rural indicator 

77]