Coverage for nlp_webserver/models.py: 94%
48 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
1r"""
2crate_anon/nlp_webserver/models.py
4===============================================================================
6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CRATE.
11 CRATE is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CRATE is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CRATE. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26SQLAlchemy models for CRATE's implementation of an NLPRP server.
28"""
30import datetime
31from typing import List, Optional
32import uuid
34from cardinal_pythonlib.datetimefunc import coerce_to_pendulum
35from pendulum import DateTime as Pendulum
36from sqlalchemy import Column, Text, VARCHAR, Boolean, DateTime
37from sqlalchemy.orm import (
38 declarative_base,
39 deferred,
40 relationship,
41 scoped_session,
42 sessionmaker,
43)
44from sqlalchemy.sql.schema import ForeignKey
46# noinspection PyPackageRequirements
47from zope.sqlalchemy import register
50# =============================================================================
51# SQLAlchemy setup
52# =============================================================================
54Session = sessionmaker(future=True)
55register(Session)
56dbsession = scoped_session(Session)
58Base = declarative_base()
61# =============================================================================
62# Constants
63# =============================================================================
65UUID64_LEN = 36 # see make_unique_id()
67MAX_DOC_ID_LEN = UUID64_LEN
68MAX_DOCPROC_ID_LEN = UUID64_LEN
69MAX_QUEUE_ID_LEN = UUID64_LEN
71MAX_JOB_ID_LEN = 255 # specified by client
72MAX_USERNAME_LEN = 255 # arbitrary
73MAX_PROCESSOR_ID_LEN = (
74 255 # e.g. Python fully-qualified name, underscore, version
75)
78# =============================================================================
79# Helper functions
80# =============================================================================
83def make_unique_id() -> str:
84 """
85 Generates a random unique ID for labelling objects, via :func:`uuid.uuid4`.
87 They look like '79cc4bac-6e8b-4ac6-bbd9-a65b5e1d1e29' (that is, hex with
88 format 8-4-4-4-12, so 32 informative characters and overall length 36
89 including the hyphens). The space is 16^32 = 3.4e38. See
90 https://docs.python.org/3.7/library/uuid.html.
91 """
92 return str(uuid.uuid4())
95# =============================================================================
96# Model classes
97# =============================================================================
100class Document(Base):
101 """
102 Represents a user-submitted document for processing. (A single document
103 may be processed by multiple processors.)
104 """
106 __tablename__ = "documents"
108 document_id = Column(
109 "document_id",
110 VARCHAR(MAX_DOC_ID_LEN),
111 primary_key=True,
112 comment="Primary key (UUID) for the document",
113 ) # type: str
114 doctext = deferred(
115 Column("doctext", Text, comment="Text contents of the document")
116 ) # type: Optional[str]
117 client_job_id = Column(
118 "client_job_id",
119 VARCHAR(MAX_JOB_ID_LEN),
120 comment="Client job ID (supplied by the client)",
121 index=True,
122 ) # type: Optional[str]
123 queue_id = Column(
124 "queue_id",
125 VARCHAR(MAX_QUEUE_ID_LEN),
126 comment="The UUID of the client request, if in queued mode",
127 index=True,
128 ) # type: Optional[str]
129 username = Column(
130 "username",
131 VARCHAR(MAX_USERNAME_LEN),
132 comment="Username that submitted this document",
133 nullable=False,
134 index=True,
135 ) # type: Optional[str]
136 client_metadata = deferred(
137 Column(
138 "client_metadata", Text, comment="Metadata submitted by the client"
139 )
140 ) # type: Optional[str]
141 include_text = Column(
142 "include_text",
143 Boolean,
144 nullable=False,
145 default=False,
146 comment="Include the source text in the reply?",
147 ) # type: Optional[bool]
148 datetime_submitted_utc = Column(
149 "datetime_submitted_utc",
150 DateTime,
151 nullable=False,
152 # Is the following OK, given that it's not exactly when it was
153 # submitted?
154 default=datetime.datetime.utcnow,
155 comment="Date/time when the request was submitted (in UTC)",
156 ) # type: Optional[datetime.datetime]
158 docprocrequests = relationship(
159 "DocProcRequest",
160 cascade="all, delete-orphan",
161 passive_deletes=True,
162 back_populates="document",
163 lazy="select",
164 # https://docs.sqlalchemy.org/en/13/orm/collections.html#using-passive-deletes # noqa: E501
165 ) # type: List[DocProcRequest]
167 @property
168 def datetime_submitted_pendulum(self) -> Optional[Pendulum]:
169 return coerce_to_pendulum(
170 self.datetime_submitted_utc, assume_local=False
171 )
174class DocProcRequest(Base):
175 """
176 SQLAlchemy table recording processor requests for a given document (that
177 is, document/processor pairs).
178 """
180 __tablename__ = "docprocrequests"
182 docprocrequest_id = Column(
183 "docprocrequest_id",
184 VARCHAR(MAX_DOCPROC_ID_LEN),
185 primary_key=True,
186 comment="Primary key (UUID) for the document/processor pair; also "
187 "used as the Celery task ID",
188 ) # type: str
189 document_id = Column(
190 "document_id",
191 VARCHAR(MAX_DOC_ID_LEN),
192 ForeignKey("documents.document_id", ondelete="CASCADE"),
193 # ... delete DocProcRequests when their Documents are deleted
194 # ... https://stackoverflow.com/questions/5033547/sqlalchemy-cascade-delete # noqa: E501
195 # ... https://docs.sqlalchemy.org/en/13/orm/collections.html#using-passive-deletes # noqa: E501
196 nullable=False,
197 comment="Document ID (FK to documents.document_id)",
198 ) # type: str
199 processor_id = Column(
200 "processor_id",
201 VARCHAR(MAX_PROCESSOR_ID_LEN),
202 nullable=False,
203 comment="Processor ID, in '<name>_<version>' format",
204 ) # type: str
205 done = Column(
206 "done",
207 Boolean,
208 nullable=False,
209 default=False,
210 comment="Has the task associated with this request been completed?",
211 ) # type: bool
212 when_done_utc = Column(
213 "when_done_utc",
214 DateTime,
215 default=None,
216 comment="Date/time when the request was completed (in UTC)",
217 ) # type: Optional[datetime.datetime]
218 results = deferred(
219 Column("results", Text, comment="Results (as JSON)")
220 ) # type: Optional[str]
222 document = relationship(
223 "Document", back_populates="docprocrequests", lazy="select"
224 ) # type: Document
226 @property
227 def doctext(self) -> Optional[str]:
228 return self.document.doctext