Coverage for cc_modules/cc_sqlalchemy.py: 62%
81 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-15 14:23 +0100
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-15 14:23 +0100
1"""
2camcops_server/cc_modules/cc_sqlalchemy.py
4===============================================================================
6 Copyright (C) 2012, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CamCOPS.
11 CamCOPS is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CamCOPS is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26**SQLAlchemy helper functions and constants.**
28We define our metadata ``Base`` here, and things like our index naming
29convention and MySQL table formats.
31A few random notes:
33- SQLAlchemy will automatically warn about clashing columns:
35 .. :code-block:: python
37 from sqlalchemy import Column, Integer
38 from sqlalchemy.ext.declarative import declarative_base
40 Base = declarative_base()
42 class Thing(Base):
43 __tablename__ = "thing"
44 a = Column("a", Integer, primary_key=True)
45 b = Column("b", Integer)
46 c = Column("b", Integer) # produces a warning:
48 .. code-block:: none
50 SAWarning: On class 'Thing', Column object 'b' named directly multiple
51 times, only one will be used: b, c. Consider using orm.synonym instead
53"""
55from io import StringIO
56import logging
57import sqlite3
58from typing import Any
60from cardinal_pythonlib.logs import BraceStyleAdapter
61from cardinal_pythonlib.sqlalchemy.dialect import (
62 get_dialect_from_name,
63 SqlaDialectName,
64)
65from cardinal_pythonlib.sqlalchemy.dump import dump_ddl
66from cardinal_pythonlib.sqlalchemy.session import (
67 make_sqlite_url,
68 SQLITE_MEMORY_URL,
69)
70from pendulum import DateTime as Pendulum
72from sqlalchemy.engine import create_engine
73from sqlalchemy.engine.base import Engine
74from sqlalchemy.ext.mutable import Mutable
75from sqlalchemy.orm import DeclarativeBaseNoMeta
76from sqlalchemy.schema import CreateTable
77from sqlalchemy.sql.schema import MetaData, Table
79from camcops_server.cc_modules.cc_cache import cache_region_static, fkg
81log = BraceStyleAdapter(logging.getLogger(__name__))
84# =============================================================================
85# Naming convention; metadata; Base
86# =============================================================================
87# https://alembic.readthedocs.org/en/latest/naming.html
88# https://docs.sqlalchemy.org/en/latest/core/constraints.html#configuring-constraint-naming-conventions # noqa
90MYSQL_MAX_IDENTIFIER_LENGTH = 64
91LONG_COLUMN_NAME_WARNING_LIMIT = 30
93NAMING_CONVENTION = {
94 # - Note that constraint names must be unique in the DATABASE, not the
95 # table;
96 # https://dev.mysql.com/doc/refman/5.6/en/create-table-foreign-keys.html
97 # - Index names only have to be unique for the table;
98 # https://stackoverflow.com/questions/30653452/do-index-names-have-to-be-unique-across-entire-database-in-mysql # noqa
99 # INDEX:
100 "ix": "ix_%(column_0_label)s",
101 # UNIQUE CONSTRAINT:
102 "uq": "uq_%(table_name)s_%(column_0_name)s",
103 # "uq": "uq_%(column_0_name)s",
104 # CHECK CONSTRAINT:
105 # "ck": "ck_%(table_name)s_%(constraint_name)s", # too long for MySQL
106 # ... https://groups.google.com/forum/#!topic/sqlalchemy/SIT4D8S9dUg
107 # "ck": "ck_%(table_name)s_%(column_0_name)s",
108 # Problem 2018-09-14:
109 # - constraints must be unique across database
110 # - MySQL only accepts 64 characters for constraint name
111 # - using "%(column_0_name)" means that explicit constrant names are
112 # ignored
113 # - using "%(constraint_name)" means that all constraints have to be named
114 # explicitly (very tedious)
115 # - so truncate?
116 # https://docs.python.org/3/library/stdtypes.html#old-string-formatting
117 # https://www.python.org/dev/peps/pep-0237/
118 # - The main problem is BOOL columns, e.g.
119 # cpft_lps_discharge.management_specialling_behavioural_disturbance
120 # - Example:
121 # longthing = "abcdefghijklmnopqrstuvwxyz"
122 # d = {"thing": longthing}
123 # "hello %(thing).10s world" % d # LEFT TRUNCATE
124 # # ... gives 'hello abcdefghij world'
125 # "ck": "ck_%(table_name).30s_%(column_0_name).30s",
126 # 3 for "ck_" leaves 61; 30 for table, 1 for "_", 30 for column
127 # ... no...
128 # "obs_contamination_bodily_waste_*"
129 "ck": "ck_%(table_name)s_%(column_0_name)s", # unique but maybe too long
130 # FOREIGN KEY:
131 # "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s", # too long for MySQL sometimes! # noqa
132 "fk": "fk_%(table_name)s_%(column_0_name)s",
133 # "fk": "fk_%(column_0_name)s",
134 # PRIMARY KEY:
135 "pk": "pk_%(table_name)s",
136}
139# The base of all our model classes:
140class Base(DeclarativeBaseNoMeta):
141 metadata = MetaData(naming_convention=NAMING_CONVENTION)
143 # Special options:
144 __table_args__ = {
145 # -------------------------------------------------------------------------
146 # MySQL special options
147 # -------------------------------------------------------------------------
148 # SQLAlchemy __table_args__:
149 # https://docs.sqlalchemy.org/en/latest/orm/extensions/declarative/table_config.html # noqa
150 # SQLAlchemy sends keyword arguments like 'mysql_keyword_name' to be
151 # rendered as KEYWORD_NAME in the CREATE TABLE statement:
152 # https://docs.sqlalchemy.org/en/latest/dialects/mysql.html
153 # Engine: InnoDB
154 "mysql_engine": "InnoDB",
155 # Barracuda: COMPRESSED or DYNAMIC
156 # https://dev.mysql.com/doc/refman/5.7/en/innodb-row-format-dynamic.html
157 # https://xenforo.com/community/threads/anyone-running-their-innodb-tables-with-row_format-compressed.99606/ # noqa
158 # We shouldn't compress everything by default; performance hit.
159 "mysql_row_format": "DYNAMIC",
160 # SEE server_troubleshooting.rst FOR BUG DISCUSSION
161 "mysql_charset": "utf8mb4 COLLATE utf8mb4_unicode_ci",
162 # Character set
163 # REPLACED # 'mysql_charset': 'utf8mb4',
164 # https://dev.mysql.com/doc/refman/5.5/en/charset-unicode-utf8mb4.html
165 # Collation
166 # Which collation for MySQL? See
167 # - https://stackoverflow.com/questions/766809/whats-the-difference-between-utf8-general-ci-and-utf8-unicode-ci # noqa
168 # REPLACED # 'mysql_collate': 'utf8mb4_unicode_ci'
169 # Note that COLLATION rules are, from least to greatest precedence:
170 # Server collation
171 # Connection-specific collation
172 # Database collation
173 # Table collation
174 # Column collation
175 # Query collation (using CAST or CONVERT)
176 # - https://stackoverflow.com/questions/24356090/difference-between-database-table-column-collation # noqa
177 # Therefore, we can set the table collation for all our tables, and not
178 # worry about the column collation, e.g. Text(collation=...).
179 #
180 # To check a MySQL database, and connection/server settings:
181 # SHOW VARIABLES LIKE '%character%';
182 # SHOW VARIABLES LIKE '%collation%';
183 # To check tables:
184 # SHOW TABLE STATUS WHERE NAME LIKE 'my_tablename'\G
185 # ... note use of \G to produce long-form output!
186 # To check columns:
187 # SHOW FULL COLUMNS FROM my_tablename;
188 #
189 # ONE THING IN PARTICULAR TO BEWARE: utf8mb4_unicode_ci produces
190 # CASE-INSENSITIVE COMPARISON. For example:
191 # SELECT 'a' = 'A'; -- produces 1
192 # SELECT 'a' = 'B'; -- produces 0
193 # SELECT BINARY 'a' = BINARY 'A'; -- produces 0
194 # This is a PROBLEM FOR PASSWORD FIELDS IF WE INTEND TO DO
195 # DATABASE-LEVEL COMPARISONS WITH THEM. In that case we must ensure a
196 # different collation is set; specifically, use
197 #
198 # utf8mb4_bin
199 #
200 # and see also
201 # SHOW COLLATION WHERE `Collation` LIKE 'utf8mb4%';
202 # and
203 # https://dev.mysql.com/doc/refman/5.6/en/charset-binary-collations.html
204 #
205 # To check, run
206 # SHOW FULL COLUMNS FROM _security_users;
207 }
209 def __init_subclass__(cls, **kwargs: Any) -> None:
210 cls.extend_columns(**kwargs)
211 super().__init_subclass__(**kwargs)
213 @classmethod
214 def extend_columns(cls, **kwargs: Any) -> None:
215 pass
218# MySQL things we can't set via SQLAlchemy, but would like to be set:
219# - max_allowed_packet: should be at least 32M
220# - innodb_strict_mode: should be 1, but less of a concern with SQLAlchemy
222# MySQL things we don't care about too much:
223# - innodb_file_per_table: desirable, but up to the user.
226# =============================================================================
227# Convenience functions
228# =============================================================================
231def make_memory_sqlite_engine(echo: bool = False) -> Engine:
232 """
233 Create an SQLAlchemy :class:`Engine` for an in-memory SQLite database.
234 """
235 return create_engine(SQLITE_MEMORY_URL, echo=echo)
238def make_file_sqlite_engine(filename: str, echo: bool = False) -> Engine:
239 """
240 Create an SQLAlchemy :class:`Engine` for an on-disk SQLite database.
241 """
242 return create_engine(make_sqlite_url(filename), echo=echo)
245def sql_from_sqlite_database(connection: sqlite3.Connection) -> str:
246 """
247 Returns SQL to describe an SQLite database.
249 Args:
250 connection: connection to SQLite database via ``sqlite3`` module
252 Returns:
253 the SQL
255 """
256 with StringIO() as f:
257 # noinspection PyTypeChecker
258 for line in connection.iterdump():
259 f.write(line + "\n")
260 f.flush()
261 return f.getvalue()
264@cache_region_static.cache_on_arguments(function_key_generator=fkg)
265def get_all_ddl(dialect_name: str = SqlaDialectName.MYSQL) -> str:
266 """
267 Returns the DDL (data definition language; SQL ``CREATE TABLE`` commands)
268 for our SQLAlchemy metadata.
270 Args:
271 dialect_name: SQLAlchemy dialect name
272 """
273 metadata = Base.metadata # type: MetaData
274 with StringIO() as f:
275 dump_ddl(metadata, dialect_name=dialect_name, fileobj=f)
276 f.flush()
277 text = f.getvalue()
278 return text
281def log_all_ddl(dialect_name: str = SqlaDialectName.MYSQL) -> None:
282 """
283 Send the DDL for our SQLAlchemy metadata to the Python log.
285 Args:
286 dialect_name: SQLAlchemy dialect name
287 """
288 text = get_all_ddl(dialect_name)
289 log.info(text)
290 log.info("DDL length: {} characters", len(text))
293@cache_region_static.cache_on_arguments(function_key_generator=fkg)
294def get_table_ddl(
295 table: Table, dialect_name: str = SqlaDialectName.MYSQL
296) -> str:
297 """
298 Returns the DDL (data definition language; SQL ``CREATE TABLE`` commands)
299 for a specific table.
301 Args:
302 table:
303 Table to dump.
304 dialect_name:
305 SQLAlchemy dialect name.
307 https://stackoverflow.com/questions/2128717/sqlalchemy-printing-raw-sql-from-create
308 """
309 dialect = get_dialect_from_name(dialect_name)
310 return str(CreateTable(table).compile(dialect=dialect))
313def assert_constraint_name_ok(table_name: str, column_name: str) -> None:
314 """
315 Checks that the automatically generated name of a constraint isn't too long
316 for specific databases.
318 Args:
319 table_name: table name
320 column_name: column name
322 Raises:
323 AssertionError, if something will break
324 """
325 d = {"table_name": table_name, "column_0_name": column_name}
326 anticipated_name = NAMING_CONVENTION["ck"] % d
327 if len(anticipated_name) > MYSQL_MAX_IDENTIFIER_LENGTH:
328 raise AssertionError(
329 f"Constraint name too long for table {table_name!r}, column "
330 f"{column_name!r}; will be {anticipated_name!r} "
331 f"of length {len(anticipated_name)}"
332 )
335# =============================================================================
336# Database engine hacks
337# =============================================================================
340def hack_pendulum_into_pymysql() -> None:
341 """
342 Hack in support for :class:`pendulum.DateTime` into the ``pymysql``
343 database interface.
345 See https://pendulum.eustace.io/docs/#limitations.
346 """
347 try:
348 # noinspection PyUnresolvedReferences
349 from pymysql.converters import encoders, escape_datetime
351 encoders[Pendulum] = escape_datetime
352 except ImportError:
353 pass
356hack_pendulum_into_pymysql()
359class MutableDict(Mutable, dict):
360 """
361 Source:
362 https://docs.sqlalchemy.org/en/14/orm/extensions/mutable.html
363 """
365 @classmethod
366 def coerce(cls, key: str, value: Any) -> Any:
367 """
368 Convert plain dictionaries to MutableDict.
369 """
371 if not isinstance(value, MutableDict):
372 if isinstance(value, dict):
373 return MutableDict(value)
375 # this call will raise ValueError
376 return Mutable.coerce(key, value)
377 else:
378 return value
380 def __setitem__(self, key: str, value: Any) -> None:
381 """
382 Detect dictionary set events and emit change events.
383 """
385 dict.__setitem__(self, key, value)
386 self.changed()
388 def __delitem__(self, key: str) -> None:
389 """
390 Detect dictionary del events and emit change events.
391 """
393 dict.__delitem__(self, key)
394 self.changed()