Coverage for cc_modules/cc_dump.py : 22%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python
3"""
4camcops_server/cc_modules/cc_dump.py
6===============================================================================
8 Copyright (C) 2012-2020 Rudolf Cardinal (rudolf@pobox.com).
10 This file is part of CamCOPS.
12 CamCOPS is free software: you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation, either version 3 of the License, or
15 (at your option) any later version.
17 CamCOPS is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.
25===============================================================================
27**Methods for providing a dump of data from the server to the web user.**
29"""
31import logging
32from typing import (
33 Any, Dict, Generator, Iterable, List, Optional, Set, Tuple, Type,
34 TYPE_CHECKING, Union,
35)
37from cardinal_pythonlib.logs import BraceStyleAdapter
38from cardinal_pythonlib.sqlalchemy.orm_inspect import (
39 gen_columns,
40 gen_orm_classes_from_base,
41 walk_orm_tree,
42)
43from sqlalchemy.exc import CompileError
44from sqlalchemy.engine.base import Engine
45from sqlalchemy.orm import Session as SqlASession
46from sqlalchemy.sql.schema import Column, MetaData, Table
48from camcops_server.cc_modules.cc_blob import Blob
49from camcops_server.cc_modules.cc_db import (
50 GenericTabletRecordMixin,
51 TaskDescendant,
52)
53from camcops_server.cc_modules.cc_device import Device
54from camcops_server.cc_modules.cc_email import Email
55from camcops_server.cc_modules.cc_exportmodels import (
56 ExportedTask,
57 ExportedTaskEmail,
58 ExportedTaskFileGroup,
59 ExportedTaskHL7Message,
60)
61from camcops_server.cc_modules.cc_exportrecipient import ExportRecipient
62from camcops_server.cc_modules.cc_group import Group, group_group_table
63from camcops_server.cc_modules.cc_membership import UserGroupMembership
64from camcops_server.cc_modules.cc_patient import Patient
65from camcops_server.cc_modules.cc_patientidnum import (
66 all_extra_id_columns,
67 PatientIdNum,
68)
69from camcops_server.cc_modules.cc_sqla_coltypes import CamcopsColumn
70from camcops_server.cc_modules.cc_task import Task
71from camcops_server.cc_modules.cc_user import User
73if TYPE_CHECKING:
74 from camcops_server.cc_modules.cc_request import CamcopsRequest
75 from camcops_server.cc_modules.cc_summaryelement import ExtraSummaryTable
76 from camcops_server.cc_modules.cc_simpleobjects import TaskExportOptions
78log = BraceStyleAdapter(logging.getLogger(__name__))
81# =============================================================================
82# Constants
83# =============================================================================
85# Restrict specified tables to certain columns only:
86DUMP_ONLY_COLNAMES = { # mapping of tablename : list_of_column_names
87 Device.__tablename__: [
88 "camcops_version",
89 "friendly_name",
90 "id",
91 "name",
92 ],
93 User.__tablename__: [
94 "fullname",
95 "id",
96 "username",
97 ]
98}
99# Drop specific columns from certain tables:
100DUMP_DROP_COLNAMES = { # mapping of tablename : list_of_column_names
101}
102# List of columns to be skipped regardless of table:
103DUMP_SKIP_COLNAMES = [
104 # We restrict to current records only, so many of these are irrelevant:
105 "_addition_pending",
106 "_forcibly_preserved",
107 "_manually_erased",
108 "_manually_erased_at",
109 "_manually_erasing_user_id",
110 "_move_off_tablet",
111 "_removal_pending",
112 "_removing_user_id",
113 "_successor_pk",
114 "_when_removed_batch_utc",
115 "_when_removed_exact",
116]
117DUMP_SKIP_RELNAMES = [
118 # List of *relationship* names to ignore
119 "_manually_erasing_user",
120 "_removing_user",
121]
122# List of table names to be skipped at all times:
123DUMP_SKIP_TABLES = [
124 # We don't have to list all admin tables here; we process the dump starting
125 # with tasks, so only things that have ORM relationships to a task might
126 # feature. (The Email/ExportedTask* set don't, so this is just caution in
127 # case we add a relationship later!)
128 Email.__tablename__,
129 ExportedTask.__tablename__,
130 ExportedTaskEmail.__tablename__,
131 ExportedTaskFileGroup.__tablename__,
132 ExportedTaskHL7Message.__tablename__,
133 ExportRecipient.__tablename__,
134 group_group_table.name,
135 UserGroupMembership.__tablename__,
136]
137# Tables for which no relationships will be traversed:
138DUMP_SKIP_ALL_RELS_FOR_TABLES = [
139 Group.__tablename__
140]
141FOREIGN_KEY_CONSTRAINTS_IN_DUMP = False
142# ... the keys will be present, but should we try to enforce constraints?
145# =============================================================================
146# Handy place to hold the controlling information
147# =============================================================================
149class DumpController(object):
150 """
151 A controller class that manages the copying (dumping) of information from
152 our database to another SQLAlchemy :class:`Engine`/:class:`Session`.
153 """
154 def __init__(self,
155 dst_engine: Engine,
156 dst_session: SqlASession,
157 export_options: "TaskExportOptions",
158 req: "CamcopsRequest") -> None:
159 """
160 Args:
161 dst_engine: destination SQLAlchemy Engine
162 dst_session: destination SQLAlchemy Session
163 export_options: :class:`camcops_server.cc_modules.cc_simpleobjects.TaskExportOptions`
164 req: :class:`camcops_server.cc_modules.cc_request.CamcopsRequest`
165 """ # noqa
166 self.dst_engine = dst_engine
167 self.dst_session = dst_session
168 self.export_options = export_options
169 self.req = req
171 # We start with blank metadata.
172 self.dst_metadata = MetaData()
173 # Tables we are inserting into the destination database:
174 self.dst_tables = {} # type: Dict[str, Table]
175 # ... note that creating a Table() for a given SQLAlchemy metadata is
176 # permitted only once, so we add to self.dst_tables as soon
177 # as we create that.
178 # Tables we've created:
179 self.tablenames_created = set() # type: Set[str]
180 # Tables we've processed, though we may ignore them:
181 self.tablenames_seen = set() # type: Set[str]
182 # ORM objects we've visited:
183 self.instances_seen = set() # type: Set[object]
185 if export_options.db_make_all_tables_even_empty:
186 self._create_all_dest_tables()
188 def _create_all_dest_tables(self) -> None:
189 """
190 Creates all tables in the destination database, even ones that may
191 not be used.
192 """
193 log.debug("Creating all destination tables...")
194 for table in self.gen_all_dest_tables():
195 self._create_dest_table(table)
196 log.debug("... all destination tables created.")
198 def gen_all_dest_tables(self) -> Generator[Table, None, None]:
199 """
200 Generates all destination tables.
201 """
202 tablenames_seen = set() # type: Set[str]
203 for cls in gen_orm_classes_from_base(GenericTabletRecordMixin): # type: Type[GenericTabletRecordMixin] # noqa
204 instance = cls()
205 for table in self.gen_all_dest_tables_for_obj(instance):
206 if table.name in tablenames_seen:
207 continue
208 tablenames_seen.add(table.name)
209 yield table
211 def gen_all_dest_tables_for_obj(self, src_obj: object) \
212 -> Generator[Table, None, None]:
213 """
214 Generates all destination tables for an object.
215 """
216 # Main table
217 yield self.get_dest_table_for_src_object(src_obj)
218 # Additional tables
219 if isinstance(src_obj, Task):
220 add_extra_id_cols = (
221 self.export_options.db_patient_id_in_each_row and
222 not src_obj.is_anonymous
223 )
224 estables = src_obj.get_all_summary_tables(self.req)
225 for est in estables:
226 yield self.get_dest_table_for_est(
227 est, add_extra_id_cols=add_extra_id_cols)
229 def gen_all_dest_columns(self) -> Generator[Union[Column, CamcopsColumn],
230 None, None]:
231 """
232 Generates all destination columns.
233 """
234 for table in self.gen_all_dest_tables():
235 for col in table.columns:
236 yield col
238 def consider_object(self, src_obj: object) -> None:
239 """
240 Think about an SQLAlchemy ORM object. If it comes from a table we
241 want dumped, add this object to the dump.
242 """
243 # noinspection PyUnresolvedReferences
244 src_table = src_obj.__table__ # type: Table
245 src_tablename = src_table.name
246 if src_tablename not in self.tablenames_seen:
247 # If we encounter a table we've not seen, offer our "table decider"
248 # the opportunity to add it to the metadata and create the table.
249 self._add_dump_table_for_src_object(src_obj)
250 # If this table is going into the destination, copy the object
251 # (and maybe remove columns from it, or add columns to it).
252 if (src_tablename in self.dst_tables and
253 not self._dump_skip_table(src_tablename)):
254 self._copy_object_to_dump(src_obj)
256 @staticmethod
257 def _merits_extra_id_num_columns_if_requested(obj: object) \
258 -> Tuple[bool, Optional[Patient]]:
259 """
260 Is the source object one that would support the addition of extra
261 ID number information if the export option ``DB_PATIENT_ID_PER_ROW`` is
262 set? If so, return the relevant patient.
264 Args:
265 obj: an SQLAlchemy ORM object
267 Returns:
268 tuple: ``(merits, patient)``, where ``merits`` is a ``bool`` (does
269 it merit this?) and ``patient`` is a relevant
270 :class:`camcops_server.cc_modules.cc_patient.Patient``, if found.
271 It is also guaranteed that if a patient is returned, ``merits`` is
272 ``True`` (but not guaranteed that if ``merits`` is true, that
273 ``patient`` is not ``None``).
275 """
276 if not isinstance(obj, GenericTabletRecordMixin):
277 # Must be data that originated from the client.
278 return False, None
279 if isinstance(obj, PatientIdNum):
280 # PatientIdNum already has this info.
281 return False, None
282 if isinstance(obj, Patient):
283 return True, obj
284 if isinstance(obj, Task):
285 if obj.is_anonymous:
286 # Anonymous tasks don't.
287 return False, None
288 return True, obj.patient
289 if isinstance(obj, TaskDescendant):
290 merits = obj.task_ancestor_might_have_patient()
291 patient = obj.task_ancestor_patient()
292 return merits, patient
293 log.warning(f"_merits_extra_id_num_columns_if_requested: don't know "
294 f"how to handle {obj!r}")
295 return False, None
297 def get_dest_table_for_src_object(self, src_obj: object) -> Table:
298 """
299 Produces the destination table for the source object.
301 Args:
302 src_obj:
303 An SQLAlchemy ORM object. It will *not* be a
304 :class:`camcops_server.cc_modules.cc_summaryelement.ExtraSummaryTable`;
305 those are handled instead by
306 :meth:`_get_or_insert_summary_table`.
308 Returns:
309 an SQLAlchemy :class:`Table`
310 """
311 # noinspection PyUnresolvedReferences
312 src_table = src_obj.__table__ # type: Table
313 tablename = src_table.name
315 # Don't create it twice in the SQLAlchemy metadata.
316 if tablename in self.dst_tables:
317 return self.dst_tables[tablename]
319 # Copy columns, dropping any we don't want, and dropping FK constraints
320 dst_columns = [] # type: List[Column]
321 for src_column in src_table.columns:
322 # log.debug("trying {!r}", src_column.name)
323 if self._dump_skip_column(tablename, src_column.name):
324 # log.debug("... skipping {!r}", src_column.name)
325 continue
326 # You can't add the source column directly; you get
327 # "sqlalchemy.exc.ArgumentError: Column object 'ccc' already
328 # assigned to Table 'ttt'"
329 copied_column = src_column.copy()
330 copied_column.comment = src_column.comment
331 # ... see SQLAlchemy trivial bug:
332 # https://bitbucket.org/zzzeek/sqlalchemy/issues/4087/columncopy-doesnt-copy-comment-attribute # noqa
333 if FOREIGN_KEY_CONSTRAINTS_IN_DUMP:
334 copied_column.foreign_keys = set(
335 fk.copy() for fk in src_column.foreign_keys
336 )
337 log.warning("NOT WORKING: foreign key commands not being "
338 "emitted")
339 # but
340 # http://docs.sqlalchemy.org/en/latest/core/constraints.html
341 # works fine under SQLite, even if the other table hasn't been
342 # created yet. Does the table to which the FK refer have to be
343 # in the metadata already?
344 # That's quite possible, but I've not checked.
345 # Would need to iterate through tables in dependency order,
346 # like merge_db() does.
347 else:
348 # Probably blank already, as the copy() command only copies
349 # non-constraint-bound ForeignKey objects, but to be sure:
350 copied_column.foreign_keys = set()
351 # ... type is: Set[ForeignKey]
352 # if src_column.foreign_keys:
353 # log.debug("Column {}, FKs {!r} -> {!r}", src_column.name,
354 # src_column.foreign_keys,
355 # copied_column.foreign_keys)
356 dst_columns.append(copied_column)
358 # Add extra columns?
359 if self.export_options.db_include_summaries:
360 if isinstance(src_obj, GenericTabletRecordMixin):
361 for summary_element in src_obj.get_summaries(self.req):
362 dst_columns.append(CamcopsColumn(
363 summary_element.name,
364 summary_element.coltype,
365 exempt_from_anonymisation=True,
366 comment=summary_element.decorated_comment))
367 if self.export_options.db_patient_id_in_each_row:
368 merits, _ = self._merits_extra_id_num_columns_if_requested(src_obj)
369 if merits:
370 dst_columns.extend(all_extra_id_columns(self.req))
371 if isinstance(src_obj, TaskDescendant):
372 dst_columns += src_obj.extra_task_xref_columns()
374 dst_table = Table(tablename, self.dst_metadata, *dst_columns)
375 # ... that modifies the metadata, so:
376 self.dst_tables[tablename] = dst_table
377 return dst_table
379 def get_dest_table_for_est(self, est: "ExtraSummaryTable",
380 add_extra_id_cols: bool = False) -> Table:
381 """
382 Add an additional summary table to the dump, if it's not there already.
383 Return the table (from the destination database).
385 Args:
386 est:
387 a
388 :class:`camcops_server.cc_modules.cc_summaryelement.ExtraSummaryTable`
389 add_extra_id_cols:
390 Add extra ID columns, for the ``DB_PATIENT_ID_PER_ROW``
391 export option?
392 """ # noqa
393 tablename = est.tablename
394 if tablename in self.dst_tables:
395 return self.dst_tables[tablename]
397 columns = est.columns.copy()
398 if add_extra_id_cols:
399 columns.extend(all_extra_id_columns(self.req))
400 columns.extend(est.extra_task_xref_columns())
401 table = Table(tablename, self.dst_metadata, *columns)
402 # ... that modifies the metadata, so:
403 self.dst_tables[tablename] = table
404 return table
406 def _add_dump_table_for_src_object(self, src_obj: object) -> None:
407 """
408 - Mark the object's table as seen.
410 - If we want it, add it to the metadata and execute a CREATE TABLE
411 command.
413 - We may translate the table en route.
415 Args:
416 src_obj:
417 An SQLAlchemy ORM object. It will *not* be a
418 :class:`camcops_server.cc_modules.cc_summaryelement.ExtraSummaryTable`;
419 those are handled instead by
420 :meth:`_get_or_insert_summary_table`.
421 """ # noqa
422 # noinspection PyUnresolvedReferences
423 src_table = src_obj.__table__ # type: Table
424 tablename = src_table.name
425 self.tablenames_seen.add(tablename)
427 # Skip the table?
428 if self._dump_skip_table(tablename):
429 return
431 # Get the table definition
432 dst_table = self.get_dest_table_for_src_object(src_obj)
433 # Create it
434 self._create_dest_table(dst_table)
436 def _create_dest_table(self, dst_table: Table) -> None:
437 """
438 Creates a table in the destination database.
439 """
440 tablename = dst_table.name
441 if tablename in self.tablenames_created:
442 return # don't create it twice
443 # Create the table
444 # log.debug("Adding table {!r} to dump output", tablename)
445 # You have to use an engine, not a session, to create tables (or you
446 # get "AttributeError: 'Session' object has no attribute
447 # '_run_visitor'").
448 # However, you have to commit the session, or you get
449 # "sqlalchemy.exc.OperationalError: (sqlite3.OperationalError)
450 # database is locked", since a session is also being used.
451 self.dst_session.commit()
452 dst_table.create(self.dst_engine)
453 self.tablenames_created.add(tablename)
455 def _copy_object_to_dump(self, src_obj: object) -> None:
456 """
457 Copy the SQLAlchemy ORM object to the dump.
458 """
459 # noinspection PyUnresolvedReferences
460 src_table = src_obj.__table__ # type: Table
461 adding_extra_ids = False
462 patient = None # type: Optional[Patient]
463 if self.export_options.db_patient_id_in_each_row:
464 adding_extra_ids, patient = \
465 self._merits_extra_id_num_columns_if_requested(src_obj)
467 # 1. Insert row for this object, potentially adding and removing
468 # columns.
469 tablename = src_table.name
470 dst_table = self.dst_tables[tablename]
471 assert dst_table.name == tablename
472 row = {} # type: Dict[str, Any]
473 # Copy columns, skipping any we don't want
474 for attrname, column in gen_columns(src_obj):
475 if self._dump_skip_column(tablename, column.name):
476 continue
477 row[column.name] = getattr(src_obj, attrname)
478 # Any other columns to add for this table?
479 if isinstance(src_obj, GenericTabletRecordMixin):
480 if self.export_options.db_include_summaries:
481 for summary_element in src_obj.get_summaries(self.req):
482 row[summary_element.name] = summary_element.value
483 if adding_extra_ids:
484 if patient:
485 patient.add_extra_idnum_info_to_row(row)
486 if isinstance(src_obj, TaskDescendant):
487 src_obj.add_extra_task_xref_info_to_row(row)
488 try:
489 self.dst_session.execute(dst_table.insert(row))
490 except CompileError:
491 log.critical("\ndst_table:\n{}\nrow:\n{}", dst_table, row)
492 raise
494 # 2. If required, add extra tables/rows that this task wants to
495 # offer (usually tables whose rows don't have a 1:1 correspondence
496 # to the task or its ancillary objects).
497 if isinstance(src_obj, Task):
498 estables = src_obj.get_all_summary_tables(self.req)
499 # ... includes SNOMED
500 for est in estables:
501 dst_summary_table = self._get_or_insert_summary_table(
502 est, add_extra_id_cols=adding_extra_ids)
503 for row in est.rows:
504 if patient:
505 patient.add_extra_idnum_info_to_row(row)
506 if adding_extra_ids:
507 est.add_extra_task_xref_info_to_row(row)
508 try:
509 self.dst_session.execute(dst_summary_table.insert(row))
510 except CompileError:
511 log.critical("\ndst_summary_table:\n{}\nrow:\n{}",
512 dst_table, row)
513 raise
515 def _get_or_insert_summary_table(self, est: "ExtraSummaryTable",
516 add_extra_id_cols: bool = False) -> Table:
517 """
518 Add an additional summary table to the dump, if it's not there already.
519 Return the table (from the destination database).
521 Args:
522 est:
523 a
524 :class:`camcops_server.cc_modules.cc_summaryelement.ExtraSummaryTable`
525 add_extra_id_cols:
526 Add extra ID columns, for the ``DB_PATIENT_ID_PER_ROW``
527 export option?
528 """ # noqa
529 tablename = est.tablename
530 if tablename not in self.tablenames_created:
531 table = self.get_dest_table_for_est(
532 est, add_extra_id_cols=add_extra_id_cols)
533 self._create_dest_table(table)
534 return self.dst_tables[tablename]
536 def _dump_skip_table(self, tablename: str) -> bool:
537 """
538 Should we skip this table (omit it from the dump)?
539 """
540 if not self.export_options.include_blobs and tablename == Blob.__tablename__: # noqa
541 return True
542 if tablename in DUMP_SKIP_TABLES:
543 return True
544 return False
546 @staticmethod
547 def _dump_skip_column(tablename: str, columnname: str) -> bool:
548 """
549 Should we skip this column (omit it from the dump)?
550 """
551 if columnname in DUMP_SKIP_COLNAMES:
552 return True
553 if (tablename in DUMP_ONLY_COLNAMES and
554 columnname not in DUMP_ONLY_COLNAMES[tablename]):
555 return True
556 if (tablename in DUMP_DROP_COLNAMES and
557 columnname in DUMP_DROP_COLNAMES[tablename]):
558 return True
559 return False
562# =============================================================================
563# Copying stuff to a dump
564# =============================================================================
566def copy_tasks_and_summaries(tasks: Iterable[Task],
567 dst_engine: Engine,
568 dst_session: SqlASession,
569 export_options: "TaskExportOptions",
570 req: "CamcopsRequest") -> None:
571 """
572 Copy a set of tasks, and their associated related information (found by
573 walking the SQLAlchemy ORM tree), to the dump.
575 Args:
576 tasks: tasks to copy
577 dst_engine: destination SQLAlchemy Engine
578 dst_session: destination SQLAlchemy Session
579 export_options: :class:`camcops_server.cc_modules.cc_simpleobjects.TaskExportOptions`
580 req: :class:`camcops_server.cc_modules.cc_request.CamcopsRequest`
581 """ # noqa
582 # How best to create the structure that's required?
583 #
584 # https://stackoverflow.com/questions/21770829/sqlalchemy-copy-schema-and-data-of-subquery-to-another-database # noqa
585 # https://stackoverflow.com/questions/40155340/sqlalchemy-reflect-and-copy-only-subset-of-existing-schema # noqa
586 #
587 # - Should we attempt to copy the MetaData object? That seems extremely
588 # laborious, since every ORM class is tied to it. Moreover,
589 # MetaData.tables is an immutabledict, so we're not going to be editing
590 # anything. Even if we cloned the MetaData, that's not going to give us
591 # ORM classes to walk.
592 # - Shall we operate at a lower level? That seems sensible.
593 # - Given that... we don't need to translate the PKs at all, unlike
594 # merge_db.
595 # - Let's not create FK constraints explicitly. Most are not achievable
596 # anyway (e.g. linking on device/era; omission of BLOBs).
598 controller = DumpController(dst_engine=dst_engine,
599 dst_session=dst_session,
600 export_options=export_options,
601 req=req)
603 # We walk through all the objects.
604 log.debug("Starting to copy tasks...")
605 for startobj in tasks:
606 log.debug("Processing task: {!r}", startobj)
607 for src_obj in walk_orm_tree(
608 startobj,
609 seen=controller.instances_seen,
610 skip_relationships_always=DUMP_SKIP_RELNAMES,
611 skip_all_relationships_for_tablenames=DUMP_SKIP_ALL_RELS_FOR_TABLES, # noqa
612 skip_all_objects_for_tablenames=DUMP_SKIP_TABLES):
613 controller.consider_object(src_obj)
614 log.debug("... finished copying tasks.")