Coverage for preprocess/constants.py: 100%
8 statements
« prev ^ index » next coverage.py v7.8.0, created at 2026-02-05 06:46 -0600
« prev ^ index » next coverage.py v7.8.0, created at 2026-02-05 06:46 -0600
1"""
2crate_anon/preprocess/constants.py
4===============================================================================
6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CRATE.
11 CRATE is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CRATE is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CRATE. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26**Constants used for lots of preprocessing functions.**
28"""
30# -----------------------------------------------------------------------------
31# Indexes in destination databases
32# -----------------------------------------------------------------------------
33# Prefix for index names:
34CRATE_IDX_PREFIX = "crate_idx"
36# -----------------------------------------------------------------------------
37# Columns for source databases
38# -----------------------------------------------------------------------------
39# If our preprocessors need to create a primary key (PK) column in a table as
40# part of preprocessing, we call it this:
41CRATE_COL_PK = "crate_pk"
42# Making it explicitly CRATE-related makes it distinctive from more generic
43# names like "id" or "pk" or "RowIdentifier".
45# Text extraction from documents
46CRATE_TABLE_EXTRACTED_TEXT = "crate_extracted_text"
47CRATE_COL_FILE_PATH = "crate_file_path"
48CRATE_COL_TEXT = "crate_text"
49CRATE_COL_TEXT_LAST_EXTRACTED = "crate_text_last_extracted"
51# -----------------------------------------------------------------------------
52# Columns in ONS Postcode Database (from CRATE import)
53# -----------------------------------------------------------------------------
54ONSPD_TABLE_POSTCODE = "postcode"
55DEFAULT_GEOG_COLS = [
56 # These are geographically "blurry" areas. The most specific is likely
57 # LSOA (or, equivalently, IMD).
58 # For details, see postcodes.py.
59 "bua11", # Built-up Area (BUA)
60 "buasd11", # Built-up Area Sub-division (BUASD)
61 "casward", # Census Area Statistics (CAS) ward
62 "imd", # Index of Multiple Deprivation (IMD) [rank of LSOA/equivalent]
63 "lea", # todo: gone? (Local Education Authority in previous data?)
64 "lsoa01", # 2001 Census Lower Layer Super Output Area (LSOA) or equivalent
65 "lsoa11", # 2011 Census Lower Layer Super Output Area (LSOA) or equiv.
66 "msoa01", # 2001 Census Middle Layer Super Output Area (MSOA) or equiv.
67 "msoa11", # 2011 Census Middle Layer Super Output Area (MSOA) or equiv.
68 "nuts", # EU Local Administrative Unit, level 2
69 "oac01", # 2001 Census Output Area classification (OAC)
70 "oac11", # 2011 Census Output Area classification (OAC)
71 "parish", # Parish/community
72 "pcon", # Westminster parliamentary constituency
73 "pct", # Primary Care Trust or equivalent
74 "ru11ind", # 2011 Census rural-urban classification
75 "statsward", # Statistical ward
76 "ur01ind", # 2001 Census urban/rural indicator
77]