Coverage for nlprp/api.py: 48%
242 statements
« prev ^ index » next coverage.py v7.8.0, created at 2026-01-06 10:22 -0600
« prev ^ index » next coverage.py v7.8.0, created at 2026-01-06 10:22 -0600
1r"""
2crate_anon/nlprp/api.py
4===============================================================================
6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CRATE.
11 CRATE is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CRATE is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CRATE. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26Validate Natural Language Processing Request Protocol (NLPRP) objects.
28"""
30import datetime
31import json
32import gzip
33from typing import Any, Dict, List, Optional, Union
35from cardinal_pythonlib.datetimefunc import (
36 coerce_to_pendulum,
37 convert_datetime_to_utc,
38 get_now_localtz_pendulum,
39 get_now_utc_pendulum,
40 pendulum_to_datetime,
41 pendulum_to_utc_datetime_without_tz,
42)
43from cardinal_pythonlib.httpconst import HttpStatus
44from cardinal_pythonlib.json_utils.typing_helpers import (
45 JsonArrayType,
46 JsonAsStringType,
47 JsonObjectType,
48 JsonValueType,
49)
50from cardinal_pythonlib.reprfunc import auto_repr
51import pendulum
52from pendulum import DateTime as Pendulum # NB name clash with SQLAlchemy
53from semantic_version import Version
55from crate_anon.common.constants import JSON_SEPARATORS_COMPACT
56from crate_anon.nlprp.constants import (
57 NlprpKeys,
58 NlprpValues,
59 ALL_NLPRP_COMMANDS,
60)
61from crate_anon.nlprp.errors import (
62 BAD_REQUEST,
63 key_missing_error,
64 mkerror,
65)
66from crate_anon.nlprp.version import NLPRP_VERSION_STRING
69# =============================================================================
70# Constants
71# =============================================================================
73DEFAULT_SERVER_NAME = "CRATE NLPRP server"
74DEFAULT_PROTOCOL_INFO = {
75 NlprpKeys.NAME: NlprpValues.NLPRP_PROTOCOL_NAME,
76 NlprpKeys.VERSION: NLPRP_VERSION_STRING,
77}
78DEFAULT_SERVER_INFO = {
79 NlprpKeys.NAME: DEFAULT_SERVER_NAME,
80 NlprpKeys.VERSION: NLPRP_VERSION_STRING,
81}
84# =============================================================================
85# Date/time conversion to/from NLPRP format
86# =============================================================================
89def nlprp_datetime_to_pendulum(ndt: str) -> Pendulum:
90 """
91 The NLPRP date/time format is ISO-8601 with all three of: date, time,
92 timezone.
94 Example:
96 .. code-block:: none
98 "2019-08-09T17:26:20.123456+01:00"
100 Args:
101 ndt: date/time in ISO-8601 format with timezone
103 Returns:
104 :class:`pendulum.DateTime` (with timezone information)
105 """
106 return pendulum.parse(ndt)
109def nlprp_datetime_to_datetime_with_tz(ndt: str) -> datetime.datetime:
110 """
111 Converts a NLPRP date/time (see :func:`nlprp_iso_datetime_to_pendulum`) to
112 a :class:`datetime.datetime` with timezone information.
114 Args:
115 ndt: date/time in ISO-8601 format with timezone
117 Returns:
118 datetime.datetime: with timezone information
119 """
120 p = nlprp_datetime_to_pendulum(ndt)
121 return pendulum_to_datetime(p)
124def nlprp_datetime_to_datetime_utc_no_tzinfo(ndt: str) -> datetime.datetime:
125 """
126 Converts a NLPRP date/time (see :func:`nlprp_iso_datetime_to_pendulum`) to
127 a :class:`datetime.datetime` in UTC with no timezone information.
129 Args:
130 ndt: date/time in ISO-8601 format with timezone
132 Returns:
133 datetime.datetime: in UTC with no timezone information
134 """
135 p = nlprp_datetime_to_pendulum(ndt)
136 return pendulum_to_utc_datetime_without_tz(p)
139def pendulum_to_nlprp_datetime(p: Pendulum, to_utc: bool = True) -> str:
140 """
141 Converts a :class:`pendulum.Pendulum` to the ISO string format (with
142 timezone) used by the NLPRP.
143 """
144 if to_utc:
145 p = convert_datetime_to_utc(p)
146 return p.isoformat()
149def datetime_to_nlprp_datetime(
150 dt: datetime.datetime, assume_utc: bool = True
151) -> str:
152 """
153 Converts a :class:`datetime.datetime` to the ISO string format (with
154 timezone) used by the NLPRP.
156 If the datetime.datetime object has no timezone info, then assume the local
157 timezone if ``assume_local`` is true; otherwise, assume UTC.
158 """
159 p = coerce_to_pendulum(dt, assume_local=not assume_utc)
160 return pendulum_to_nlprp_datetime(p)
163def nlprp_datetime_now(as_local: bool = True) -> str:
164 """
165 Returns the time now, as a string suitable for use with NLPRP.
167 Args:
168 as_local: use local timezone? (Otherwise, use UTC.)
169 """
170 now = get_now_localtz_pendulum() if as_local else get_now_utc_pendulum()
171 return pendulum_to_nlprp_datetime(now)
174# =============================================================================
175# Get arguments from JSON objects
176# =============================================================================
179def json_get_bool(
180 x: JsonObjectType, key: str, default: bool = None, required: bool = False
181) -> bool:
182 """
183 Gets a boolean parameter from part of the JSON request.
185 Args:
186 x: a JSON object (dictionary)
187 key: the name of the key
188 default: the default value
189 required: is it mandatory, or can it be missing or ``null``?
191 Returns:
192 bool: the result, or the default
194 Raises:
195 :exc:`NlprpError` if the value is bad, or is missing and required.
196 """
197 value = x.get(key)
198 if value is None: # missing, or "null"
199 if required:
200 raise key_missing_error(key)
201 else:
202 return default
203 if not isinstance(value, bool):
204 mkerror(BAD_REQUEST, f"{key!r} parameter not Boolean")
205 return value
208def json_get_int(
209 x: JsonObjectType, key: str, default: int = None, required: bool = False
210) -> int:
211 """
212 Gets an integer parameter from part of the JSON request.
214 Args:
215 x: a JSON object (dictionary)
216 key: the name of the key
217 default: the default value
218 required: is it mandatory, or can it be missing or ``null``?
220 Returns:
221 int: the result, or the default
223 Raises:
224 :exc:`NlprpError` if the value is bad, or is missing and required.
225 """
226 value = x.get(key, default)
227 if value is None: # missing, or "null"
228 if required:
229 raise key_missing_error(key)
230 else:
231 return default
232 if not isinstance(value, int):
233 mkerror(BAD_REQUEST, f"{key!r} parameter not integer")
234 return value
237def json_get_float(
238 x: JsonObjectType, key: str, default: int = None, required: bool = False
239) -> int:
240 """
241 Gets a float (or int) parameter from part of the JSON request.
243 Args:
244 x: a JSON object (dictionary)
245 key: the name of the key
246 default: the default value
247 required: is it mandatory, or can it be missing or ``null``?
249 Returns:
250 float: the result, or the default
252 Raises:
253 :exc:`NlprpError` if the value is bad, or is missing and required.
254 """
255 value = x.get(key, default)
256 if value is None: # missing, or "null"
257 if required:
258 raise key_missing_error(key)
259 else:
260 return default
261 if not isinstance(value, (float, int)):
262 mkerror(BAD_REQUEST, f"{key!r} parameter not float")
263 return value
266def json_get_str(
267 x: JsonObjectType, key: str, default: str = None, required: bool = False
268) -> str:
269 """
270 Gets a string parameter from part of the JSON request.
272 Args:
273 x: a JSON object (dictionary)
274 key: the name of the key
275 default: the default value
276 required: is it mandatory, or can it be missing or ``null``?
278 Returns:
279 str: the result, or the default
281 Raises:
282 :exc:`NlprpError` if the value is bad, or is missing and required.
283 """
284 value = x.get(key, default)
285 if value is None: # missing, or "null"
286 if required:
287 raise key_missing_error(key)
288 else:
289 return default
290 if not isinstance(value, str):
291 mkerror(BAD_REQUEST, f"{key!r} parameter not string")
292 return value
295def json_get_array(
296 x: JsonObjectType, key: str, required: bool = False
297) -> JsonArrayType:
298 """
299 Gets a array (list) parameter from part of the JSON request.
301 Args:
302 x: a JSON object (dictionary)
303 key: the name of the key
304 required: is the array required?
306 Returns:
307 list: the result, or ``[]`` if the parameter is missing and
308 ``required == False``.
310 Raises:
311 :exc:`NlprpError` if the value is bad, or is missing and required.
312 """
313 value = x.get(key)
314 if value is None: # missing, or "null"
315 if required:
316 raise key_missing_error(key)
317 else:
318 return [] # type: JsonArrayType
319 if not isinstance(value, list):
320 mkerror(BAD_REQUEST, f"{key!r} parameter not a JSON array (list)")
321 return value
324def json_get_array_of_str(
325 x: JsonObjectType, key: str, required: bool = False
326) -> List[str]:
327 """
328 Gets an array of strings from part of the JSON request.
330 Args:
331 x: a JSON object (dictionary)
332 key: the name of the key
333 required: is the array required?
335 Returns:
336 list: the result, or ``[]`` if the parameter is missing and
337 ``required == False``.
339 Raises:
340 :exc:`NlprpError` if the value is bad, or is missing and required.
341 """
342 value = x.get(key)
343 if value is None: # missing, or "null"
344 if required:
345 raise key_missing_error(key)
346 else:
347 return [] # type: JsonArrayType
348 if not isinstance(value, list):
349 mkerror(BAD_REQUEST, f"{key!r} parameter not a JSON array (list)")
350 if not all(isinstance(x, str) for x in value):
351 mkerror(BAD_REQUEST, f"Non-string value as part of {key!r}")
352 return value
355def json_get_object(
356 x: JsonObjectType, key: str, required: bool = False
357) -> JsonObjectType:
358 """
359 Gets an object (dictionary) parameter from part of the JSON request.
361 Args:
362 x: a JSON object (dictionary)
363 key: the name of the key
364 required: is the object required?
366 Returns:
367 list: the result, or ``{}`` if the parameter is missing and
368 ``required == False``.
370 Raises:
371 :exc:`NlprpError` if the value is bad, or is missing and required.
372 """
373 value = x.get(key)
374 if value is None: # missing, or "null"
375 if required:
376 raise key_missing_error(key)
377 else:
378 return {} # type: JsonArrayType
379 if not isinstance(value, dict):
380 mkerror(
381 BAD_REQUEST, f"{key!r} parameter not a JSON object (dictionary)"
382 )
383 return value
386def json_get_value(
387 x: JsonValueType,
388 key: str,
389 default: JsonValueType = None,
390 required: bool = False,
391) -> JsonValueType:
392 """
393 Gets an JSON value (object, array, or literal) parameter from part of the
394 JSON request.
396 Args:
397 x: a JSON object (dictionary)
398 key: the name of the key
399 default: the default value
400 required: is the value required?
402 Returns:
403 the result, or the default
405 Raises:
406 :exc:`NlprpError` if the value is bad, or is missing and required.
407 """
408 value = x.get(key)
409 if value is None: # missing, or "null"
410 if required:
411 raise key_missing_error(key)
412 else:
413 return default
414 if not isinstance(value, (dict, list, str, int, float, bool)):
415 # None is covered above
416 mkerror(BAD_REQUEST, f"{key!r} parameter not a JSON value")
417 return value
420def json_get_toplevel_args(
421 nlprp_request: JsonObjectType, required: bool = True
422) -> JsonObjectType:
423 """
424 Returns the top-level arguments for a NLPRP request.
426 Args:
427 nlprp_request: the NLPRP request object
428 required: are the args required?
430 Returns:
431 dict: the result
433 Raises:
434 :exc:`NlprpError` if the value is bad, or is missing and required.
435 """
436 value = nlprp_request.get(NlprpKeys.ARGS)
437 if value is None:
438 if required:
439 raise key_missing_error(NlprpKeys.ARGS, is_args=True)
440 else:
441 return {} # type: JsonArrayType
442 if not isinstance(value, dict):
443 mkerror(
444 BAD_REQUEST,
445 f"{NlprpKeys.ARGS!r} parameter not a JSON object (dictionary)",
446 )
447 return value
450# =============================================================================
451# Validity checkers
452# =============================================================================
455def is_nlprp_protocol_valid(
456 x: JsonObjectType, min_version: Version = None, max_version: Version = None
457) -> bool:
458 """
459 Is the parameter a valid NLPRP request/response object?
461 Args:
462 x: dictionary to test
463 min_version: minimum NLPRP version to accept; None for no minimum
464 max_version: maximum NLPRP version to accept; None for no maximum
465 """
466 try:
467 protocol = x.get(NlprpKeys.PROTOCOL, None) # type: JsonObjectType
468 # ... will raise AttributeError if not a dict
469 protocol_name = protocol[NlprpKeys.NAME]
470 assert protocol_name.lower() == NlprpValues.NLPRP_PROTOCOL_NAME
471 protocol_version = Version(protocol[NlprpKeys.VERSION])
472 # ... the Version() call may raise TypeError, ValueError
473 if min_version is not None:
474 assert protocol_version >= min_version
475 if max_version is not None:
476 assert protocol_version <= max_version
477 except (AssertionError, AttributeError, KeyError, TypeError, ValueError):
478 return False
479 return True
482def is_valid_nlprp_request(
483 x: JsonObjectType, min_version: Version = None, max_version: Version = None
484) -> bool:
485 """
486 Is the parameter a valid NLPRP request (client to server)?
488 Args:
489 x: dictionary to test
490 min_version: minimum NLPRP version to accept; None for no minimum
491 max_version: maximum NLPRP version to accept; None for no maximum
492 """
493 try:
494 assert is_nlprp_protocol_valid(
495 x, min_version=min_version, max_version=max_version
496 )
497 command = x[NlprpKeys.COMMAND].lower() # case-insensitive
498 assert command in ALL_NLPRP_COMMANDS
499 except (AssertionError, AttributeError, KeyError, TypeError, ValueError):
500 return False
501 return True
504def is_valid_nlprp_response(
505 x: JsonObjectType, min_version: Version = None, max_version: Version = None
506) -> bool:
507 """
508 Is the parameter a valid NLPRP response (server to client)?
510 Args:
511 x: dictionary to test
512 min_version: minimum NLPRP version to accept; None for no minimum
513 max_version: maximum NLPRP version to accept; None for no maximum
514 """
515 try:
516 assert is_nlprp_protocol_valid(
517 x, min_version=min_version, max_version=max_version
518 )
519 except (AssertionError, AttributeError, KeyError, TypeError, ValueError):
520 return False
521 return True
524# =============================================================================
525# Dictionary creators
526# =============================================================================
529def make_nlprp_dict() -> JsonObjectType:
530 """
531 Creates the basic dictionary used by the NLPRP protocol.
532 """
533 return {NlprpKeys.PROTOCOL: DEFAULT_PROTOCOL_INFO}
536def make_nlprp_request(
537 command: str, command_args: Any = None
538) -> JsonObjectType:
539 """
540 Creates a NLPRP request (client to server) dictionary.
542 Args:
543 command: NLPRP command
544 command_args: optional argument dictionary
545 """
546 assert command in ALL_NLPRP_COMMANDS
547 d = make_nlprp_dict()
548 d[NlprpKeys.COMMAND] = command
549 if command_args:
550 d[NlprpKeys.ARGS] = command_args
551 return d
554def make_nlprp_response(
555 http_status: int,
556 reply_args: JsonObjectType = None,
557 server_info: JsonObjectType = None,
558) -> JsonObjectType:
559 """
560 Creates a NLPRP response (server to client) dictionary.
562 Args:
563 http_status: HTTP status code
564 reply_args: reply dictionary
565 server_info: ``server_info`` dictionary, or ``None`` for a default
566 """
567 assert http_status is not None
568 server_info = server_info or DEFAULT_SERVER_INFO
569 reply_args = reply_args or {} # type: JsonObjectType
570 d = make_nlprp_dict()
571 d[NlprpKeys.STATUS] = http_status
572 d[NlprpKeys.SERVER_INFO] = server_info
573 d.update(**reply_args)
574 return d
577# =============================================================================
578# Generic object
579# =============================================================================
582class NlprpMessage:
583 """
584 Represents an NLPRP (natural language processing request protocol) message,
585 be it a request (client to server) or a response (server to client).
586 """
588 def __init__(
589 self,
590 data: Union[str, bytes, JsonObjectType],
591 data_is_gzipped: bool = False,
592 ) -> None:
593 """
594 Initialize with data as either
596 - gzipped bytes, representing bytes...
597 - bytes, representing a UTF-8 encoded str...
598 - str, representing a JSON-encoded dict...
599 - a dict.
601 Args:
602 data: the data
603 data_is_gzipped: if ``data`` is of type ``bytes``, is it gzipped?
604 """
605 self._data = {} # type: JsonObjectType
606 if isinstance(data, bytes):
607 if data_is_gzipped:
608 data = gzip.decompress(data)
609 # noinspection PyTypeChecker
610 data = data.decode("utf-8") # now it's a str
611 if isinstance(data, str):
612 data = json.loads(data) # type: JsonObjectType
613 if isinstance(data, dict):
614 self._data = data
616 def __str__(self) -> str:
617 return repr(self._data)
619 @property
620 def dict(self) -> JsonObjectType:
621 """
622 Returns the underlying dictionary.
623 """
624 return self._data
626 @property
627 def data_str(self) -> JsonAsStringType:
628 """
629 Returns a JSON-encoded version of the underlying dictionary.
630 """
631 return json.dumps(self._data, separators=JSON_SEPARATORS_COMPACT)
633 @property
634 def data_bytes(self) -> bytes:
635 """
636 Returns a UTF-8 encoded version of the JSON-encoded underlying
637 dictionary.
638 """
639 return self.data_str.encode("utf-8")
641 @property
642 def data_gzipped(self) -> bytes:
643 """
644 Returns a GZIP-compressed version of ``data_bytes``.
645 """
646 return gzip.compress(self.data_bytes)
648 def protocol_valid(
649 self, min_version: Version = None, max_version: Version = None
650 ) -> bool:
651 """
652 Is the protocol valid?
654 Args:
655 min_version: minimum NLPRP version to accept; None for no minimum
656 max_version: maximum NLPRP version to accept; None for no maximum
657 """
658 return is_nlprp_protocol_valid(
659 self._data, min_version=min_version, max_version=max_version
660 )
662 def valid(
663 self, min_version: Version = None, max_version: Version = None
664 ) -> bool:
665 """
666 Is the message valid?
668 Overridden in subclasses to perform more specific checks.
670 Args:
671 min_version: minimum NLPRP version to accept; None for no minimum
672 max_version: maximum NLPRP version to accept; None for no maximum
673 """
674 return self.protocol_valid(
675 min_version=min_version, max_version=max_version
676 )
679class NlprpRequest(NlprpMessage):
680 """
681 Represents an NLPRP request (client to server).
682 """
684 def __init__(
685 self,
686 command: str = None,
687 command_args: JsonObjectType = None,
688 data: Union[str, bytes, JsonObjectType] = None,
689 data_is_gzipped: bool = False,
690 ) -> None:
691 """
692 Initialize with one of the following sets of parameters:
694 - ``command`` and optionally ``args`` -- typically used by clients
695 creating a request to send to the server
696 - ``data`` -- typically used by servers parsing a client's request
698 Args:
699 command: NLPRP command
700 command_args: optional argument dictionary
701 data: data as gzipped bytes, bytes, str, or a dict
702 data_is_gzipped: if ``data`` is used, and is of type ``bytes``,
703 is it GZIP-compressed?
704 """
705 super().__init__(data=data, data_is_gzipped=data_is_gzipped)
706 if not data:
707 # Build an NLPRP message from command/args
708 assert command, "data not specified, so must specify command"
709 self._data = make_nlprp_request(command, command_args)
711 def valid(
712 self, min_version: Version = None, max_version: Version = None
713 ) -> bool:
714 """
715 Is the request valid?
717 Args:
718 min_version: minimum NLPRP version to accept; None for no minimum
719 max_version: maximum NLPRP version to accept; None for no maximum
720 """
721 return is_valid_nlprp_request(
722 self._data, min_version=min_version, max_version=max_version
723 )
725 @property
726 def command(self) -> str:
727 """
728 Returns the NLPRP command.
729 """
730 return json_get_str(self._data, NlprpKeys.COMMAND, "")
732 @property
733 def args(self) -> JsonObjectType:
734 """
735 Returns the NLPRP command arguments.
736 """
737 return json_get_object(self._data, NlprpKeys.ARGS, required=False)
740class NlprpResponse(NlprpMessage):
741 """
742 Represents an NLPRP response (server to client).
743 """
745 def __init__(
746 self,
747 data: Union[str, bytes, JsonObjectType] = None,
748 data_is_gzipped: bool = False,
749 http_status: int = HttpStatus.OK,
750 reply_args: Dict[str, Any] = None,
751 server_info: Dict[str, Any] = None,
752 ) -> None:
753 """
754 Initialize with one of the following sets of parameters:
756 - ``data`` -- typically used by clients parsing a server's reply
757 - ``http_status`` and ``reply_args`` -- typically used by servers
758 creating a reply to send to the client
760 Args:
761 data: data as gzipped bytes, bytes, str, or a dict
762 data_is_gzipped: if ``data`` is used, and is of type ``bytes``,
763 is it GZIP-compressed?
764 http_status: HTTP status code
765 reply_args: any other parts to the reply
766 """
767 super().__init__(data=data, data_is_gzipped=data_is_gzipped)
768 if not data:
769 # Build a reply
770 self._data = make_nlprp_response(
771 http_status=http_status,
772 reply_args=reply_args,
773 server_info=server_info,
774 )
776 @property
777 def status(self) -> int:
778 """
779 Returns the status of the NLPRP response, or -1 if it's missing.
780 """
781 return json_get_int(self._data, NlprpKeys.STATUS, -1)
783 @property
784 def server_info(self) -> JsonObjectType:
785 """
786 Returns the ``server_info`` part of the NLPRP response.
787 """
788 return json_get_object(
789 self._data, NlprpKeys.SERVER_INFO, required=False
790 )
793class NlprpServerProcessor:
794 """
795 Class for containing information about am NLP processor known to an NLPRP
796 server.
797 """
799 def __init__(
800 self,
801 name: str,
802 title: str,
803 version: str,
804 is_default_version: bool,
805 description: str,
806 schema_type: str = NlprpValues.UNKNOWN,
807 sql_dialect: Optional[str] = None,
808 tabular_schema: Optional[Dict[str, Any]] = None,
809 ) -> None:
810 assert schema_type in (NlprpValues.UNKNOWN, NlprpValues.TABULAR), (
811 "'schema_type' must be one of '{NlprpValues.UNKNOWN}', "
812 "'{NlprpValues.TABULAR}' for each processor."
813 )
814 self.name = name
815 self.title = title
816 self.version = version
817 self.is_default_version = is_default_version
818 self.description = description
819 self.schema_type = schema_type
820 self.sql_dialect = sql_dialect
821 self.tabular_schema = tabular_schema
823 @property
824 def infodict(self) -> Dict[str, Any]:
825 d = {
826 NlprpKeys.NAME: self.name,
827 NlprpKeys.TITLE: self.title,
828 NlprpKeys.VERSION: self.version,
829 NlprpKeys.IS_DEFAULT_VERSION: self.is_default_version,
830 NlprpKeys.DESCRIPTION: self.description,
831 NlprpKeys.SCHEMA_TYPE: self.schema_type,
832 }
833 if self.schema_type == NlprpValues.TABULAR:
834 d[NlprpKeys.SQL_DIALECT] = self.sql_dialect
835 d[NlprpKeys.TABULAR_SCHEMA] = self.tabular_schema
836 return d
838 def __str__(self) -> str:
839 return str(self.infodict)
841 def __repr__(self) -> str:
842 return auto_repr(self)
844 def is_tabular(self) -> bool:
845 """
846 Is the format of the schema information given by the remote processor
847 tabular?
848 """
849 return self.schema_type == NlprpValues.TABULAR