Coverage for nlp_manager/cloud_config.py: 86%

37 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-08-27 10:34 -0500

1""" 

2crate_anon/nlp_manager/cloud_config.py 

3 

4=============================================================================== 

5 

6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry. 

7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

8 

9 This file is part of CRATE. 

10 

11 CRATE is free software: you can redistribute it and/or modify 

12 it under the terms of the GNU General Public License as published by 

13 the Free Software Foundation, either version 3 of the License, or 

14 (at your option) any later version. 

15 

16 CRATE is distributed in the hope that it will be useful, 

17 but WITHOUT ANY WARRANTY; without even the implied warranty of 

18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

19 GNU General Public License for more details. 

20 

21 You should have received a copy of the GNU General Public License 

22 along with CRATE. If not, see <https://www.gnu.org/licenses/>. 

23 

24=============================================================================== 

25 

26Config object used for cloud NLP requests. 

27 

28""" 

29 

30import logging 

31import os 

32from typing import TYPE_CHECKING, Dict, Tuple 

33 

34from crate_anon.nlp_manager.constants import ( 

35 CloudNlpConfigKeys, 

36 NlpDefValues, 

37 DEFAULT_CLOUD_LIMIT_BEFORE_COMMIT, 

38 DEFAULT_CLOUD_MAX_CONTENT_LENGTH, 

39 DEFAULT_CLOUD_MAX_RECORDS_PER_REQUEST, 

40 DEFAULT_CLOUD_MAX_TRIES, 

41 DEFAULT_CLOUD_RATE_LIMIT_HZ, 

42 DEFAULT_CLOUD_WAIT_ON_CONN_ERR_S, 

43 full_sectionname, 

44 NlpConfigPrefixes, 

45) 

46 

47if TYPE_CHECKING: 

48 from crate_anon.nlp_manager.nlp_definition import NlpDefinition 

49 

50log = logging.getLogger(__name__) 

51 

52 

53# ============================================================================= 

54# CloudConfig 

55# ============================================================================= 

56 

57 

58class CloudConfig: 

59 """ 

60 Common config object for cloud NLP. 

61 """ 

62 

63 def __init__( 

64 self, nlpdef: "NlpDefinition", name: str, req_data_dir: str 

65 ) -> None: 

66 """ 

67 Reads the config from the NLP definition's config file. 

68 

69 Args: 

70 nlpdef: 

71 a :class:`crate_anon.nlp_manager.nlp_definition.NlpDefinition` 

72 name: 

73 name for the cloud NLP configuration (to which a standard 

74 prefix will be added to get the config section name) 

75 req_data_dir: 

76 directory in which to store temporary request files 

77 """ 

78 from crate_anon.nlp_manager.cloud_parser import ( 

79 Cloud, 

80 ) # delayed import 

81 

82 self._nlpdef = nlpdef 

83 self.req_data_dir = req_data_dir 

84 

85 cfg = nlpdef.get_config_section( 

86 full_sectionname(NlpConfigPrefixes.CLOUD, name) 

87 ) 

88 

89 self.url = cfg.opt_str(CloudNlpConfigKeys.CLOUD_URL, required=True) 

90 self.verify_ssl = cfg.opt_bool(CloudNlpConfigKeys.VERIFY_SSL, True) 

91 self.compress = cfg.opt_bool(CloudNlpConfigKeys.COMPRESS, True) 

92 self.username = cfg.opt_str(CloudNlpConfigKeys.USERNAME, default="") 

93 self.password = cfg.opt_str(CloudNlpConfigKeys.PASSWORD, default="") 

94 self.max_content_length = cfg.opt_int( 

95 CloudNlpConfigKeys.MAX_CONTENT_LENGTH, 

96 DEFAULT_CLOUD_MAX_CONTENT_LENGTH, 

97 ) 

98 self.limit_before_commit = cfg.opt_int( 

99 CloudNlpConfigKeys.LIMIT_BEFORE_COMMIT, 

100 DEFAULT_CLOUD_LIMIT_BEFORE_COMMIT, 

101 ) 

102 self.max_records_per_request = cfg.opt_int( 

103 CloudNlpConfigKeys.MAX_RECORDS_PER_REQUEST, 

104 DEFAULT_CLOUD_MAX_RECORDS_PER_REQUEST, 

105 ) 

106 self.stop_at_failure = cfg.opt_bool( 

107 CloudNlpConfigKeys.STOP_AT_FAILURE, True 

108 ) 

109 self.wait_on_conn_err = cfg.opt_int( 

110 CloudNlpConfigKeys.WAIT_ON_CONN_ERR, 

111 DEFAULT_CLOUD_WAIT_ON_CONN_ERR_S, 

112 ) 

113 self.max_tries = cfg.opt_int( 

114 CloudNlpConfigKeys.MAX_TRIES, DEFAULT_CLOUD_MAX_TRIES 

115 ) 

116 self.rate_limit_hz = cfg.opt_int( 

117 CloudNlpConfigKeys.RATE_LIMIT_HZ, DEFAULT_CLOUD_RATE_LIMIT_HZ 

118 ) 

119 self.test_length_function_speed = cfg.opt_bool( 

120 CloudNlpConfigKeys.TEST_LENGTH_FUNCTION_SPEED, True 

121 ) 

122 self.remote_processors = {} # type: Dict[Tuple[str, str], 'Cloud'] 

123 for processor in self._nlpdef.processors: 

124 if not isinstance(processor, Cloud): 

125 # ... only add 'Cloud' processors 

126 log.warning( 

127 f"Skipping NLP processor of non-cloud (e.g. local) " 

128 f"type: {processor.friendly_name}" 

129 ) 

130 continue 

131 self.remote_processors[ 

132 (processor.procname, processor.procversion) 

133 ] = processor 

134 # NOTE: KEY IS A TUPLE! 

135 # We need the following in order to decide whether to ask to include 

136 # text in reply - if a processor is GATE we need to, as it does not 

137 # send back the content of the nlp snippet 

138 self.has_gate_processors = any( 

139 (x.format == NlpDefValues.FORMAT_GATE) 

140 for x in self.remote_processors.values() 

141 ) 

142 

143 @property 

144 def data_filename(self) -> str: 

145 """ 

146 Returns the filename to be used for storing data. 

147 """ 

148 nlpname = self._nlpdef.name 

149 return os.path.abspath( 

150 os.path.join(self.req_data_dir, f"request_data_{nlpname}.csv") 

151 )