Coverage for nlp_manager/cloud_config.py: 86%
37 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
1"""
2crate_anon/nlp_manager/cloud_config.py
4===============================================================================
6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CRATE.
11 CRATE is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CRATE is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CRATE. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26Config object used for cloud NLP requests.
28"""
30import logging
31import os
32from typing import TYPE_CHECKING, Dict, Tuple
34from crate_anon.nlp_manager.constants import (
35 CloudNlpConfigKeys,
36 NlpDefValues,
37 DEFAULT_CLOUD_LIMIT_BEFORE_COMMIT,
38 DEFAULT_CLOUD_MAX_CONTENT_LENGTH,
39 DEFAULT_CLOUD_MAX_RECORDS_PER_REQUEST,
40 DEFAULT_CLOUD_MAX_TRIES,
41 DEFAULT_CLOUD_RATE_LIMIT_HZ,
42 DEFAULT_CLOUD_WAIT_ON_CONN_ERR_S,
43 full_sectionname,
44 NlpConfigPrefixes,
45)
47if TYPE_CHECKING:
48 from crate_anon.nlp_manager.nlp_definition import NlpDefinition
50log = logging.getLogger(__name__)
53# =============================================================================
54# CloudConfig
55# =============================================================================
58class CloudConfig:
59 """
60 Common config object for cloud NLP.
61 """
63 def __init__(
64 self, nlpdef: "NlpDefinition", name: str, req_data_dir: str
65 ) -> None:
66 """
67 Reads the config from the NLP definition's config file.
69 Args:
70 nlpdef:
71 a :class:`crate_anon.nlp_manager.nlp_definition.NlpDefinition`
72 name:
73 name for the cloud NLP configuration (to which a standard
74 prefix will be added to get the config section name)
75 req_data_dir:
76 directory in which to store temporary request files
77 """
78 from crate_anon.nlp_manager.cloud_parser import (
79 Cloud,
80 ) # delayed import
82 self._nlpdef = nlpdef
83 self.req_data_dir = req_data_dir
85 cfg = nlpdef.get_config_section(
86 full_sectionname(NlpConfigPrefixes.CLOUD, name)
87 )
89 self.url = cfg.opt_str(CloudNlpConfigKeys.CLOUD_URL, required=True)
90 self.verify_ssl = cfg.opt_bool(CloudNlpConfigKeys.VERIFY_SSL, True)
91 self.compress = cfg.opt_bool(CloudNlpConfigKeys.COMPRESS, True)
92 self.username = cfg.opt_str(CloudNlpConfigKeys.USERNAME, default="")
93 self.password = cfg.opt_str(CloudNlpConfigKeys.PASSWORD, default="")
94 self.max_content_length = cfg.opt_int(
95 CloudNlpConfigKeys.MAX_CONTENT_LENGTH,
96 DEFAULT_CLOUD_MAX_CONTENT_LENGTH,
97 )
98 self.limit_before_commit = cfg.opt_int(
99 CloudNlpConfigKeys.LIMIT_BEFORE_COMMIT,
100 DEFAULT_CLOUD_LIMIT_BEFORE_COMMIT,
101 )
102 self.max_records_per_request = cfg.opt_int(
103 CloudNlpConfigKeys.MAX_RECORDS_PER_REQUEST,
104 DEFAULT_CLOUD_MAX_RECORDS_PER_REQUEST,
105 )
106 self.stop_at_failure = cfg.opt_bool(
107 CloudNlpConfigKeys.STOP_AT_FAILURE, True
108 )
109 self.wait_on_conn_err = cfg.opt_int(
110 CloudNlpConfigKeys.WAIT_ON_CONN_ERR,
111 DEFAULT_CLOUD_WAIT_ON_CONN_ERR_S,
112 )
113 self.max_tries = cfg.opt_int(
114 CloudNlpConfigKeys.MAX_TRIES, DEFAULT_CLOUD_MAX_TRIES
115 )
116 self.rate_limit_hz = cfg.opt_int(
117 CloudNlpConfigKeys.RATE_LIMIT_HZ, DEFAULT_CLOUD_RATE_LIMIT_HZ
118 )
119 self.test_length_function_speed = cfg.opt_bool(
120 CloudNlpConfigKeys.TEST_LENGTH_FUNCTION_SPEED, True
121 )
122 self.remote_processors = {} # type: Dict[Tuple[str, str], 'Cloud']
123 for processor in self._nlpdef.processors:
124 if not isinstance(processor, Cloud):
125 # ... only add 'Cloud' processors
126 log.warning(
127 f"Skipping NLP processor of non-cloud (e.g. local) "
128 f"type: {processor.friendly_name}"
129 )
130 continue
131 self.remote_processors[
132 (processor.procname, processor.procversion)
133 ] = processor
134 # NOTE: KEY IS A TUPLE!
135 # We need the following in order to decide whether to ask to include
136 # text in reply - if a processor is GATE we need to, as it does not
137 # send back the content of the nlp snippet
138 self.has_gate_processors = any(
139 (x.format == NlpDefValues.FORMAT_GATE)
140 for x in self.remote_processors.values()
141 )
143 @property
144 def data_filename(self) -> str:
145 """
146 Returns the filename to be used for storing data.
147 """
148 nlpname = self._nlpdef.name
149 return os.path.abspath(
150 os.path.join(self.req_data_dir, f"request_data_{nlpname}.csv")
151 )