Coverage for cc_modules/cc_proquint.py: 97%
75 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-15 15:51 +0100
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-15 15:51 +0100
1"""
2camcops_server/cc_modules/cc_proquint.py
4===============================================================================
6 Copyright (C) 2012, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CamCOPS.
11 CamCOPS is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CamCOPS is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26Convert integers into Pronounceable Quintuplets (proquints)
27https://arxiv.org/html/0901.4016
29Based on https://github.com/dsw/proquint, which has the following licence:
31--8<---------------------------------------------------------------------------
33Copyright (c) 2009 Daniel S. Wilkerson
34All rights reserved.
36Redistribution and use in source and binary forms, with or without
37modification, are permitted provided that the following conditions are
38met:
40 Redistributions of source code must retain the above copyright
41 notice, this list of conditions and the following disclaimer.
42 Redistributions in binary form must reproduce the above copyright
43 notice, this list of conditions and the following disclaimer in
44 the documentation and/or other materials provided with the
45 distribution.
47 Neither the name of Daniel S. Wilkerson nor the names of its
48 contributors may be used to endorse or promote products derived
49 from this software without specific prior written permission.
51THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
52"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
53LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
54A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
55OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
56SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
57LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
58DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
59THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
60(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
61OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
63--8<---------------------------------------------------------------------------
66"""
68from typing import List
69import uuid
71CONSONANTS = "bdfghjklmnprstvz"
72VOWELS = "aiou"
74SIZE_OF_CONSONANT = 4
75SIZE_OF_VOWEL = 2
77LOOKUP_CONSONANTS = {
78 "b": 0x0,
79 "d": 0x1,
80 "f": 0x2,
81 "g": 0x3,
82 "h": 0x4,
83 "j": 0x5,
84 "k": 0x6,
85 "l": 0x7,
86 "m": 0x8,
87 "n": 0x9,
88 "p": 0xA,
89 "r": 0xB,
90 "s": 0xC,
91 "t": 0xD,
92 "v": 0xE,
93 "z": 0xF,
94}
95LOOKUP_VOWELS = {"a": 0x0, "i": 0x1, "o": 0x2, "u": 0x3}
96LOOKUP_TABLE = {**LOOKUP_CONSONANTS, **LOOKUP_VOWELS}
99class InvalidProquintException(Exception):
100 pass
103def proquint_from_uuid(uuid_obj: uuid.UUID) -> str:
104 """
105 Convert UUID to proquint (via the UUID's 128-bit integer representation).
106 """
107 return proquint_from_int(uuid_obj.int, 128)
110def proquint_from_int(int_value: int, size_in_bits: int) -> str:
111 """Convert integer value into proquint
113 .. code-block:: none
115 >>> proquint_from_int(0x493b05ee, 32)
116 hohur-bilov
118 0x493b05ee in binary is:
119 0100 1001 0011 1011 - 0000 0101 1110 1110
121 grouped into alternating 4 and 2 bit values:
123 cons vo cons vo cons - cons vo cons vo cons
124 0100 10 0100 11 1011 - 0000 01 0111 10 1110
126 h o h u r - b i l o v
128 Args:
129 int_value:
130 integer value to encode
131 size_in_bits:
132 size of integer in bits (must be a multiple of 16)
134 Returns:
135 proquint string identifier
136 """
137 proquint: List[str] = []
139 if size_in_bits % 16 != 0:
140 raise ValueError(
141 f"size_in_bits ({size_in_bits}) must be a multiple of 16"
142 )
144 for i in range(size_in_bits // 16):
145 proquint.insert(0, _proquint_from_int16(int_value & 0xFFFF))
147 int_value >>= 16
149 check_character = _generate_check_character("".join(proquint))
151 proquint.append(check_character)
153 return "-".join(proquint)
156def _generate_check_character(proquint: str) -> str:
157 """
158 Luhn mod 16 check digit
160 https://en.wikipedia.org/wiki/Luhn_mod_N_algorithm
162 .. code-block:: none
163 consonant_values = {
164 'b': 0x0, 'd': 0x1, 'f': 0x2, 'g': 0x3,
165 'h': 0x4, 'j': 0x5, 'k': 0x6, 'l': 0x7,
166 'm': 0x8, 'n': 0x9, 'p': 0xa, 'r': 0xb,
167 's': 0xc, 't': 0xd, 'v': 0xe, 'z': 0xf,
168 }
170 vowel_values = {
171 'a': 0x0, 'i': 0x1, 'o': 0x2, 'u': 0x3,
172 }
174 To generate the check character, start with the last character in the
175 string and move left doubling every other code-point. The "digits" of
176 the code-points as written in hex (since there are 16 valid input
177 characters) should then be summed up:
179 Example (all in hex):
181 hohur-bilov
183 Character h o h u r b i l o v
184 Code point 4 2 4 3 b 0 1 7 2 e
185 Double 4 6 0 e 1c
186 Reduce 4 4 4 6 b 0 1 e 2 1+c
187 Sum 4 4 4 6 b 0 1 e 2 d
189 Total sum = 4 + 4 + 4 + 6 + b + 0 + 1 + e + 2 + d = 0x3b
190 Next multiple of 0x10 is 0x40
192 Check character code = 0x40 - 0x3b = 0x5
193 So check character is 'j'
195 """
197 remainder = _generate_luhn_mod_16_remainder(proquint, 2)
199 check_code_point = (16 - remainder) % 16
201 return CONSONANTS[check_code_point]
204def _proquint_from_int16(int16_value: int) -> str:
205 """
206 Convert 16-bit integer into proquint.
207 """
208 proquint: list[str] = []
209 for i in range(5):
210 if i & 1:
211 letters = VOWELS
212 mask = 0x3
213 shift = SIZE_OF_VOWEL
214 else:
215 letters = CONSONANTS
216 mask = 0xF
217 shift = SIZE_OF_CONSONANT
219 index = int16_value & mask
220 proquint.insert(0, letters[index])
221 int16_value >>= shift
223 return "".join(proquint)
226def uuid_from_proquint(proquint: str) -> uuid.UUID:
227 """
228 Convert proquint to UUID.
229 """
230 int_value = int_from_proquint(proquint)
232 return uuid.UUID(int=int_value)
235def int_from_proquint(proquint: str) -> int:
236 """
237 Convert proquint string into integer.
239 .. code-block:; none
241 >>> hex(int_from_proquint('hohur-bilov-j'))
242 0x493b05ee
244 h o h u r - b i l o v
245 0x4 0x2 0x4 0x3 0xb - 0x0 0x1 0x7 0x2 0xe
247 0100 10 0100 11 1011 - 0000 01 0111 10 1110
248 0100 1001 0011 1011 - 0000 0101 1110 1110
249 0x4 0x9 0x3 0xb - 0x0 0x5 0xe 0xe
251 Args:
252 proquint:
253 string to decode
254 Returns:
255 converted integer value
256 """
258 int_value = 0
260 words = proquint.split("-")
262 if not _is_valid_proquint("".join(words)):
263 raise InvalidProquintException(
264 f"'{proquint}' is not valid (check character mismatch)"
265 )
267 # Remove check character
268 words.pop()
270 for word in words:
271 for i, c in enumerate(word):
272 if i & 1:
273 lookup_table = LOOKUP_VOWELS
274 shift = SIZE_OF_VOWEL
275 else:
276 lookup_table = LOOKUP_CONSONANTS
277 shift = SIZE_OF_CONSONANT
279 value = lookup_table.get(c)
281 if value is None:
282 raise InvalidProquintException(
283 f"'{proquint}' contains invalid or transposed characters"
284 )
286 int_value <<= shift
287 int_value += value
289 return int_value
292def _is_valid_proquint(proquint: str) -> bool:
293 """
294 Does the proquint validate?
295 """
296 return _generate_luhn_mod_16_remainder(proquint, 1) == 0
299def _generate_luhn_mod_16_remainder(proquint: str, start_factor: int) -> int:
300 """
301 Part of the checksum calculations; see :func:`_generate_check_character`.
302 For a valid sequence, the overall remainder should be 0.
303 See https://en.wikipedia.org/wiki/Luhn_mod_N_algorithm.
304 """
305 factor = start_factor
306 sum_ = 0
308 for char in reversed(proquint):
309 value = LOOKUP_TABLE[char] * factor
310 sum_ = sum_ + value // 16 + value % 16
312 if factor == 2:
313 factor = 1
314 else:
315 factor = 2
317 return sum_ % 16