Coverage for nlp_webserver/server_processor.py: 83%

60 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-08-27 10:34 -0500

1r""" 

2crate_anon/nlp_webserver/server_processor.py 

3 

4=============================================================================== 

5 

6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry. 

7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

8 

9 This file is part of CRATE. 

10 

11 CRATE is free software: you can redistribute it and/or modify 

12 it under the terms of the GNU General Public License as published by 

13 the Free Software Foundation, either version 3 of the License, or 

14 (at your option) any later version. 

15 

16 CRATE is distributed in the hope that it will be useful, 

17 but WITHOUT ANY WARRANTY; without even the implied warranty of 

18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

19 GNU General Public License for more details. 

20 

21 You should have received a copy of the GNU General Public License 

22 along with CRATE. If not, see <https://www.gnu.org/licenses/>. 

23 

24=============================================================================== 

25 

26ServerProcessor class. 

27 

28""" 

29 

30from typing import Dict, Optional, Any 

31 

32from crate_anon.nlp_manager.base_nlp_parser import BaseNlpParser 

33from crate_anon.nlp_manager.processor_helpers import ( 

34 make_nlp_parser_unconfigured, 

35) 

36from crate_anon.nlprp.api import JsonObjectType, NlprpServerProcessor 

37from crate_anon.nlprp.constants import NlprpKeys, NlprpValues 

38from crate_anon.nlprp.errors import BAD_REQUEST, mkerror, no_such_proc_error 

39from crate_anon.nlp_webserver.constants import ( 

40 KEY_PROCTYPE, 

41 PROCTYPE_GATE, 

42 GATE_BASE_URL, 

43) 

44 

45 

46class ServerProcessor(NlprpServerProcessor): 

47 """ 

48 Adds extra information to 

49 :class:`crate_anon.nlprp.api.NlprpServerProcessor`. 

50 

51 - For ease of finding processor info based on name and version 

52 (alternative would be a dictionary in which the keys were name_version 

53 and the values were another dictionary with the rest of the info). 

54 

55 - Also used as the client-side representation. 

56 """ 

57 

58 # Master list of all instances (processors) 

59 processors = {} # type: Dict[str, "ServerProcessor"] 

60 

61 def __init__( 

62 self, 

63 name: str, 

64 title: str, 

65 version: str, 

66 is_default_version: bool, 

67 description: str, 

68 schema_type: str = NlprpValues.UNKNOWN, 

69 sql_dialect: Optional[str] = None, 

70 tabular_schema: Optional[Dict[str, Any]] = None, 

71 proctype: Optional[str] = None, 

72 ) -> None: 

73 super().__init__( 

74 name=name, 

75 title=title, 

76 version=version, 

77 is_default_version=is_default_version, 

78 description=description, 

79 schema_type=schema_type, 

80 sql_dialect=sql_dialect, 

81 tabular_schema=tabular_schema, 

82 ) 

83 if len(self.processor_id) > 100: 

84 raise ValueError( 

85 f"Processor id {self.processor_id} is too " 

86 "long for database field" 

87 ) 

88 

89 self.base_url = None 

90 if proctype == PROCTYPE_GATE: 

91 self.base_url = GATE_BASE_URL 

92 

93 self.parser = None # type: Optional[BaseNlpParser] 

94 if not proctype: 

95 self.proctype = name 

96 else: 

97 self.proctype = proctype 

98 

99 # Add instance to list of processors 

100 ServerProcessor.processors[self.processor_id] = self 

101 

102 @classmethod 

103 def from_nlprp_json_dict( 

104 cls, processor_dict: Dict[str, Any] 

105 ) -> NlprpServerProcessor: 

106 pd = processor_dict # shorthand 

107 return ServerProcessor( 

108 name=pd[NlprpKeys.NAME], 

109 title=pd[NlprpKeys.TITLE], 

110 version=pd[NlprpKeys.VERSION], 

111 is_default_version=pd[NlprpKeys.IS_DEFAULT_VERSION], 

112 description=pd[NlprpKeys.DESCRIPTION], 

113 proctype=pd.get(KEY_PROCTYPE), # may be None 

114 schema_type=pd[NlprpKeys.SCHEMA_TYPE], # 'unknown' or 'tabular' 

115 sql_dialect=pd.get(NlprpKeys.SQL_DIALECT), 

116 tabular_schema=pd.get(NlprpKeys.TABULAR_SCHEMA), 

117 ) # also registers with the ServerProcessor class 

118 

119 @classmethod 

120 def debug_remove_processor(cls, name: str, version: str) -> None: 

121 """ 

122 For debugging purposes (testing). De-registers a processor. 

123 """ 

124 processor_id = cls._mk_processor_id(name, version) 

125 cls.processors.pop(processor_id, None) # delete if present 

126 

127 @classmethod 

128 def _mk_processor_id(cls, name: str, version: str) -> str: 

129 return f"{name}_{version}" 

130 

131 @property 

132 def processor_id(self) -> str: 

133 return self._mk_processor_id(self.name, self.version) 

134 

135 @classmethod 

136 def get_processor(cls, name: str, version: str = "") -> "ServerProcessor": 

137 """ 

138 Fetch a processor by name and (optionally) version. 

139 

140 Args: 

141 name: requested processor name 

142 version: (optional) requested processor version 

143 

144 Returns: 

145 a :class:`Processor` 

146 

147 Raises: 

148 :exc:`crate_anon.nlprp.errors.NlprpError` if no processor 

149 matches. 

150 """ 

151 for candidate in cls.processors.values(): 

152 if name == candidate.name: 

153 # Initially coded as case-insensitive (as someone might put 

154 # e.g. 'CRP' instead of 'Crp'), but has to be case-sensitive 

155 # because some of the GATE processors have the same name as the 

156 # Python ones only different case. 

157 if version: 

158 # Specific version requested. 

159 if version == candidate.version: 

160 return candidate 

161 else: 

162 # No specific version requested. 

163 if candidate.is_default_version: 

164 return candidate 

165 raise no_such_proc_error(name, version) 

166 

167 @classmethod 

168 def get_processor_nlprp( 

169 cls, requested_processor_dict: JsonObjectType 

170 ) -> "ServerProcessor": 

171 """ 

172 Fetch a processor, from an NLPRP dictionary specifying it. 

173 

174 Args: 

175 requested_processor_dict: part of an NLPRP request 

176 

177 Returns: 

178 a :class:`Processor` 

179 

180 Raises: 

181 :exc:`crate_anon.nlprp.errors.NlprpError` if the 

182 ``NlprpKeys.NAME`` key is missing or no processor matches. 

183 """ 

184 version = requested_processor_dict.get(NlprpKeys.VERSION) # optional 

185 try: 

186 name = requested_processor_dict[NlprpKeys.NAME] # may raise 

187 except KeyError: 

188 raise mkerror( 

189 BAD_REQUEST, f"Processor request has no {NlprpKeys.NAME!r} key" 

190 ) 

191 return cls.get_processor(name=name, version=version) 

192 

193 @classmethod 

194 def get_processor_from_id(cls, processor_id: str) -> "ServerProcessor": 

195 """ 

196 Fetch a processor, from a processor ID (a string representing name and 

197 versio). 

198 

199 Args: 

200 processor_id: string in the format ``name_version``. The version 

201 part can't contain an underscore, but the name can. 

202 

203 Returns: 

204 a :class:`Processor` 

205 

206 Raises: 

207 :exc:`crate_anon.nlprp.errors.NlprpError` if no processor 

208 matches. 

209 """ 

210 # Split on the last occurrence of '_' 

211 name, _, version = processor_id.rpartition("_") 

212 return cls.get_processor(name, version) 

213 

214 def set_parser(self) -> None: 

215 """ 

216 Sets 'self.parser' to an instance of a subclass of 'BaseNlpParser' 

217 not bound to any nlpdef or cfgsection, unless self.proctype is GATE 

218 (in which case, do nothing). 

219 """ 

220 if self.proctype != PROCTYPE_GATE: 

221 # We do not have to supply a NLP definition here. 

222 self.parser = make_nlp_parser_unconfigured(self.proctype) 

223 # else: do nothing