Coverage for cc_modules/cc_proquint.py: 97%

75 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-15 15:51 +0100

1""" 

2camcops_server/cc_modules/cc_proquint.py 

3 

4=============================================================================== 

5 

6 Copyright (C) 2012, University of Cambridge, Department of Psychiatry. 

7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

8 

9 This file is part of CamCOPS. 

10 

11 CamCOPS is free software: you can redistribute it and/or modify 

12 it under the terms of the GNU General Public License as published by 

13 the Free Software Foundation, either version 3 of the License, or 

14 (at your option) any later version. 

15 

16 CamCOPS is distributed in the hope that it will be useful, 

17 but WITHOUT ANY WARRANTY; without even the implied warranty of 

18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

19 GNU General Public License for more details. 

20 

21 You should have received a copy of the GNU General Public License 

22 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>. 

23 

24=============================================================================== 

25 

26Convert integers into Pronounceable Quintuplets (proquints) 

27https://arxiv.org/html/0901.4016 

28 

29Based on https://github.com/dsw/proquint, which has the following licence: 

30 

31--8<--------------------------------------------------------------------------- 

32 

33Copyright (c) 2009 Daniel S. Wilkerson 

34All rights reserved. 

35 

36Redistribution and use in source and binary forms, with or without 

37modification, are permitted provided that the following conditions are 

38met: 

39 

40 Redistributions of source code must retain the above copyright 

41 notice, this list of conditions and the following disclaimer. 

42 Redistributions in binary form must reproduce the above copyright 

43 notice, this list of conditions and the following disclaimer in 

44 the documentation and/or other materials provided with the 

45 distribution. 

46 

47 Neither the name of Daniel S. Wilkerson nor the names of its 

48 contributors may be used to endorse or promote products derived 

49 from this software without specific prior written permission. 

50 

51THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 

52"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 

53LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 

54A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 

55OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 

56SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 

57LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 

58DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 

59THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 

60(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 

61OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 

62 

63--8<--------------------------------------------------------------------------- 

64 

65 

66""" 

67 

68from typing import List 

69import uuid 

70 

71CONSONANTS = "bdfghjklmnprstvz" 

72VOWELS = "aiou" 

73 

74SIZE_OF_CONSONANT = 4 

75SIZE_OF_VOWEL = 2 

76 

77LOOKUP_CONSONANTS = { 

78 "b": 0x0, 

79 "d": 0x1, 

80 "f": 0x2, 

81 "g": 0x3, 

82 "h": 0x4, 

83 "j": 0x5, 

84 "k": 0x6, 

85 "l": 0x7, 

86 "m": 0x8, 

87 "n": 0x9, 

88 "p": 0xA, 

89 "r": 0xB, 

90 "s": 0xC, 

91 "t": 0xD, 

92 "v": 0xE, 

93 "z": 0xF, 

94} 

95LOOKUP_VOWELS = {"a": 0x0, "i": 0x1, "o": 0x2, "u": 0x3} 

96LOOKUP_TABLE = {**LOOKUP_CONSONANTS, **LOOKUP_VOWELS} 

97 

98 

99class InvalidProquintException(Exception): 

100 pass 

101 

102 

103def proquint_from_uuid(uuid_obj: uuid.UUID) -> str: 

104 """ 

105 Convert UUID to proquint (via the UUID's 128-bit integer representation). 

106 """ 

107 return proquint_from_int(uuid_obj.int, 128) 

108 

109 

110def proquint_from_int(int_value: int, size_in_bits: int) -> str: 

111 """Convert integer value into proquint 

112 

113 .. code-block:: none 

114 

115 >>> proquint_from_int(0x493b05ee, 32) 

116 hohur-bilov 

117 

118 0x493b05ee in binary is: 

119 0100 1001 0011 1011 - 0000 0101 1110 1110 

120 

121 grouped into alternating 4 and 2 bit values: 

122 

123 cons vo cons vo cons - cons vo cons vo cons 

124 0100 10 0100 11 1011 - 0000 01 0111 10 1110 

125 

126 h o h u r - b i l o v 

127 

128 Args: 

129 int_value: 

130 integer value to encode 

131 size_in_bits: 

132 size of integer in bits (must be a multiple of 16) 

133 

134 Returns: 

135 proquint string identifier 

136 """ 

137 proquint: List[str] = [] 

138 

139 if size_in_bits % 16 != 0: 

140 raise ValueError( 

141 f"size_in_bits ({size_in_bits}) must be a multiple of 16" 

142 ) 

143 

144 for i in range(size_in_bits // 16): 

145 proquint.insert(0, _proquint_from_int16(int_value & 0xFFFF)) 

146 

147 int_value >>= 16 

148 

149 check_character = _generate_check_character("".join(proquint)) 

150 

151 proquint.append(check_character) 

152 

153 return "-".join(proquint) 

154 

155 

156def _generate_check_character(proquint: str) -> str: 

157 """ 

158 Luhn mod 16 check digit 

159 

160 https://en.wikipedia.org/wiki/Luhn_mod_N_algorithm 

161 

162 .. code-block:: none 

163 consonant_values = { 

164 'b': 0x0, 'd': 0x1, 'f': 0x2, 'g': 0x3, 

165 'h': 0x4, 'j': 0x5, 'k': 0x6, 'l': 0x7, 

166 'm': 0x8, 'n': 0x9, 'p': 0xa, 'r': 0xb, 

167 's': 0xc, 't': 0xd, 'v': 0xe, 'z': 0xf, 

168 } 

169 

170 vowel_values = { 

171 'a': 0x0, 'i': 0x1, 'o': 0x2, 'u': 0x3, 

172 } 

173 

174 To generate the check character, start with the last character in the 

175 string and move left doubling every other code-point. The "digits" of 

176 the code-points as written in hex (since there are 16 valid input 

177 characters) should then be summed up: 

178 

179 Example (all in hex): 

180 

181 hohur-bilov 

182 

183 Character h o h u r b i l o v 

184 Code point 4 2 4 3 b 0 1 7 2 e 

185 Double 4 6 0 e 1c 

186 Reduce 4 4 4 6 b 0 1 e 2 1+c 

187 Sum 4 4 4 6 b 0 1 e 2 d 

188 

189 Total sum = 4 + 4 + 4 + 6 + b + 0 + 1 + e + 2 + d = 0x3b 

190 Next multiple of 0x10 is 0x40 

191 

192 Check character code = 0x40 - 0x3b = 0x5 

193 So check character is 'j' 

194 

195 """ 

196 

197 remainder = _generate_luhn_mod_16_remainder(proquint, 2) 

198 

199 check_code_point = (16 - remainder) % 16 

200 

201 return CONSONANTS[check_code_point] 

202 

203 

204def _proquint_from_int16(int16_value: int) -> str: 

205 """ 

206 Convert 16-bit integer into proquint. 

207 """ 

208 proquint: list[str] = [] 

209 for i in range(5): 

210 if i & 1: 

211 letters = VOWELS 

212 mask = 0x3 

213 shift = SIZE_OF_VOWEL 

214 else: 

215 letters = CONSONANTS 

216 mask = 0xF 

217 shift = SIZE_OF_CONSONANT 

218 

219 index = int16_value & mask 

220 proquint.insert(0, letters[index]) 

221 int16_value >>= shift 

222 

223 return "".join(proquint) 

224 

225 

226def uuid_from_proquint(proquint: str) -> uuid.UUID: 

227 """ 

228 Convert proquint to UUID. 

229 """ 

230 int_value = int_from_proquint(proquint) 

231 

232 return uuid.UUID(int=int_value) 

233 

234 

235def int_from_proquint(proquint: str) -> int: 

236 """ 

237 Convert proquint string into integer. 

238 

239 .. code-block:; none 

240 

241 >>> hex(int_from_proquint('hohur-bilov-j')) 

242 0x493b05ee 

243 

244 h o h u r - b i l o v 

245 0x4 0x2 0x4 0x3 0xb - 0x0 0x1 0x7 0x2 0xe 

246 

247 0100 10 0100 11 1011 - 0000 01 0111 10 1110 

248 0100 1001 0011 1011 - 0000 0101 1110 1110 

249 0x4 0x9 0x3 0xb - 0x0 0x5 0xe 0xe 

250 

251 Args: 

252 proquint: 

253 string to decode 

254 Returns: 

255 converted integer value 

256 """ 

257 

258 int_value = 0 

259 

260 words = proquint.split("-") 

261 

262 if not _is_valid_proquint("".join(words)): 

263 raise InvalidProquintException( 

264 f"'{proquint}' is not valid (check character mismatch)" 

265 ) 

266 

267 # Remove check character 

268 words.pop() 

269 

270 for word in words: 

271 for i, c in enumerate(word): 

272 if i & 1: 

273 lookup_table = LOOKUP_VOWELS 

274 shift = SIZE_OF_VOWEL 

275 else: 

276 lookup_table = LOOKUP_CONSONANTS 

277 shift = SIZE_OF_CONSONANT 

278 

279 value = lookup_table.get(c) 

280 

281 if value is None: 

282 raise InvalidProquintException( 

283 f"'{proquint}' contains invalid or transposed characters" 

284 ) 

285 

286 int_value <<= shift 

287 int_value += value 

288 

289 return int_value 

290 

291 

292def _is_valid_proquint(proquint: str) -> bool: 

293 """ 

294 Does the proquint validate? 

295 """ 

296 return _generate_luhn_mod_16_remainder(proquint, 1) == 0 

297 

298 

299def _generate_luhn_mod_16_remainder(proquint: str, start_factor: int) -> int: 

300 """ 

301 Part of the checksum calculations; see :func:`_generate_check_character`. 

302 For a valid sequence, the overall remainder should be 0. 

303 See https://en.wikipedia.org/wiki/Luhn_mod_N_algorithm. 

304 """ 

305 factor = start_factor 

306 sum_ = 0 

307 

308 for char in reversed(proquint): 

309 value = LOOKUP_TABLE[char] * factor 

310 sum_ = sum_ + value // 16 + value % 16 

311 

312 if factor == 2: 

313 factor = 1 

314 else: 

315 factor = 2 

316 

317 return sum_ % 16