Coverage for cc_modules/cc_proquint.py : 24%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2camcops_server/cc_modules/cc_proquint.py
4===============================================================================
6 Copyright (C) 2012-2020 Rudolf Cardinal (rudolf@pobox.com).
8 This file is part of CamCOPS.
10 CamCOPS is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation, either version 3 of the License, or
13 (at your option) any later version.
15 CamCOPS is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.
23===============================================================================
25Convert integers into Pronounceable Quintuplets (proquints)
26https://arxiv.org/html/0901.4016
28Based on https://github.com/dsw/proquint, which has the following licence:
30--8<---------------------------------------------------------------------------
32Copyright (c) 2009 Daniel S. Wilkerson
33All rights reserved.
35Redistribution and use in source and binary forms, with or without
36modification, are permitted provided that the following conditions are
37met:
39 Redistributions of source code must retain the above copyright
40 notice, this list of conditions and the following disclaimer.
41 Redistributions in binary form must reproduce the above copyright
42 notice, this list of conditions and the following disclaimer in
43 the documentation and/or other materials provided with the
44 distribution.
46 Neither the name of Daniel S. Wilkerson nor the names of its
47 contributors may be used to endorse or promote products derived
48 from this software without specific prior written permission.
50THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
51"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
52LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
53A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
54OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
56LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
57DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
58THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
59(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
60OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
62--8<---------------------------------------------------------------------------
65"""
66import uuid
68CONSONANTS = "bdfghjklmnprstvz"
69VOWELS = "aiou"
71SIZE_OF_CONSONANT = 4
72SIZE_OF_VOWEL = 2
74LOOKUP_CONSONANTS = {
75 'b': 0x0, 'd': 0x1, 'f': 0x2, 'g': 0x3,
76 'h': 0x4, 'j': 0x5, 'k': 0x6, 'l': 0x7,
77 'm': 0x8, 'n': 0x9, 'p': 0xa, 'r': 0xb,
78 's': 0xc, 't': 0xd, 'v': 0xe, 'z': 0xf,
79}
80LOOKUP_VOWELS = {
81 'a': 0x0, 'i': 0x1, 'o': 0x2, 'u': 0x3,
82}
83LOOKUP_TABLE = {
84 **LOOKUP_CONSONANTS, **LOOKUP_VOWELS,
85}
88class InvalidProquintException(Exception):
89 pass
92def proquint_from_uuid(uuid_obj: uuid.UUID) -> str:
93 """
94 Convert UUID to proquint (via the UUID's 128-bit integer representation).
95 """
96 return proquint_from_int(uuid_obj.int, 128)
99def proquint_from_int(int_value: int,
100 size_in_bits: int) -> str:
101 """Convert integer value into proquint
103 .. code-block:: none
105 >>> proquint_from_int(0x493b05ee, 32)
106 hohur-bilov
108 0x493b05ee in binary is:
109 0100 1001 0011 1011 - 0000 0101 1110 1110
111 grouped into alternating 4 and 2 bit values:
113 cons vo cons vo cons - cons vo cons vo cons
114 0100 10 0100 11 1011 - 0000 01 0111 10 1110
116 h o h u r - b i l o v
118 Args:
119 int_value:
120 integer value to encode
121 size_in_bits:
122 size of integer in bits (must be a multiple of 16)
124 Returns:
125 proquint string identifier
126 """
127 proquint = []
129 if size_in_bits % 16 != 0:
130 raise ValueError(
131 f"size_in_bits ({size_in_bits}) must be a multiple of 16"
132 )
134 for i in range(size_in_bits // 16):
135 proquint.insert(0, _proquint_from_int16(int_value & 0xffff))
137 int_value >>= 16
139 check_character = _generate_check_character("".join(proquint))
141 proquint.append(check_character)
143 return "-".join(proquint)
146def _generate_check_character(proquint: str) -> str:
147 """
148 Luhn mod 16 check digit
150 https://en.wikipedia.org/wiki/Luhn_mod_N_algorithm
152 .. code-block:: none
153 consonant_values = {
154 'b': 0x0, 'd': 0x1, 'f': 0x2, 'g': 0x3,
155 'h': 0x4, 'j': 0x5, 'k': 0x6, 'l': 0x7,
156 'm': 0x8, 'n': 0x9, 'p': 0xa, 'r': 0xb,
157 's': 0xc, 't': 0xd, 'v': 0xe, 'z': 0xf,
158 }
160 vowel_values = {
161 'a': 0x0, 'i': 0x1, 'o': 0x2, 'u': 0x3,
162 }
164 To generate the check character, start with the last character in the
165 string and move left doubling every other code-point. The "digits" of
166 the code-points as written in hex (since there are 16 valid input
167 characters) should then be summed up:
169 Example (all in hex):
171 hohur-bilov
173 Character h o h u r b i l o v
174 Code point 4 2 4 3 b 0 1 7 2 e
175 Double 4 6 0 e 1c
176 Reduce 4 4 4 6 b 0 1 e 2 1+c
177 Sum 4 4 4 6 b 0 1 e 2 d
179 Total sum = 4 + 4 + 4 + 6 + b + 0 + 1 + e + 2 + d = 0x3b
180 Next multiple of 0x10 is 0x40
182 Check character code = 0x40 - 0x3b = 0x5
183 So check character is 'j'
185 """
187 remainder = _generate_luhn_mod_16_remainder(proquint, 2)
189 check_code_point = (16 - remainder) % 16
191 return CONSONANTS[check_code_point]
194def _proquint_from_int16(int16_value: int) -> str:
195 """
196 Convert 16-bit integer into proquint.
197 """
198 proquint = []
199 for i in range(5):
200 if i & 1:
201 letters = VOWELS
202 mask = 0x3
203 shift = SIZE_OF_VOWEL
204 else:
205 letters = CONSONANTS
206 mask = 0xf
207 shift = SIZE_OF_CONSONANT
209 index = int16_value & mask
210 proquint.insert(0, letters[index])
211 int16_value >>= shift
213 return ''.join(proquint)
216def uuid_from_proquint(proquint: str) -> uuid.UUID:
217 """
218 Convert proquint to UUID.
219 """
220 int_value = int_from_proquint(proquint)
222 return uuid.UUID(int=int_value)
225def int_from_proquint(proquint: str) -> int:
226 """
227 Convert proquint string into integer.
229 .. code-block:; none
231 >>> hex(int_from_proquint('hohur-bilov-j'))
232 0x493b05ee
234 h o h u r - b i l o v
235 0x4 0x2 0x4 0x3 0xb - 0x0 0x1 0x7 0x2 0xe
237 0100 10 0100 11 1011 - 0000 01 0111 10 1110
238 0100 1001 0011 1011 - 0000 0101 1110 1110
239 0x4 0x9 0x3 0xb - 0x0 0x5 0xe 0xe
241 Args:
242 proquint:
243 string to decode
244 Returns:
245 converted integer value
246 """
248 int_value = 0
250 words = proquint.split("-")
252 if not _is_valid_proquint("".join(words)):
253 raise InvalidProquintException(
254 f"'{proquint}' is not valid (check character mismatch)"
255 )
257 # Remove check character
258 words.pop()
260 for word in words:
261 for (i, c) in enumerate(word):
262 if i & 1:
263 lookup_table = LOOKUP_VOWELS
264 shift = SIZE_OF_VOWEL
265 else:
266 lookup_table = LOOKUP_CONSONANTS
267 shift = SIZE_OF_CONSONANT
269 value = lookup_table.get(c)
271 if value is None:
272 raise InvalidProquintException(
273 f"'{proquint}' contains invalid or transposed characters"
274 )
276 int_value <<= shift
277 int_value += value
279 return int_value
282def _is_valid_proquint(proquint: str) -> bool:
283 """
284 Does the proquint validate?
285 """
286 return _generate_luhn_mod_16_remainder(proquint, 1) == 0
289def _generate_luhn_mod_16_remainder(proquint: str, start_factor: int) -> int:
290 """
291 Part of the checksum calculations; see :func:`_generate_check_character`.
292 For a valid sequence, the overall remainder should be 0.
293 See https://en.wikipedia.org/wiki/Luhn_mod_N_algorithm.
294 """
295 factor = start_factor
296 sum_ = 0
298 for char in reversed(proquint):
299 value = LOOKUP_TABLE[char] * factor
300 sum_ = sum_ + value // 16 + value % 16
302 if factor == 2:
303 factor = 1
304 else:
305 factor = 2
307 return sum_ % 16