Coverage for src/sequin/__init__.py: 100.000%

97 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2024-07-14 11:39 +0200

1# SPDX-FileCopyrightText: 2024 Marco Ricci <m@the13thletter.info> 

2# 

3# SPDX-License-Identifier: MIT 

4 

5"""A Python reimplementation of James Coglan's "sequin" Node.js module. 

6 

7James Coglan's "sequin" Node.js module provides a pseudorandom number 

8generator (using rejection sampling on a stream of input numbers) that 

9attempts to minimize the amount of information it throws away: 

10(non-degenerate) rejected samples are fed into a stream of higher-order 

11numbers from which the next random number generation request will be 

12served. The sequin module is used in Coglan's "vault" module (a 

13deterministic, stateless password manager that recomputes passwords 

14instead of storing them), and this reimplementation is used for 

15a similar purpose. 

16 

17The main API is the [`Sequin`] [sequin.Sequin] class, which is 

18thoroughly documented. 

19 

20""" 

21 

22from __future__ import annotations 

23 

24import collections 

25 

26from collections.abc import Iterator, Sequence 

27from typing_extensions import assert_type 

28 

29__all__ = ('Sequin', 'SequinExhaustedError') 

30__author__ = 'Marco Ricci <m@the13thletter.info>' 

31__version__ = "0.1.0" 

32 

33class Sequin: 

34 """Generate pseudorandom non-negative numbers in different ranges. 

35 

36 Given a (presumed high-quality) uniformly random sequence of input 

37 bits, generate pseudorandom non-negative integers in a certain range 

38 on each call of the `generate` method. (It is permissible to 

39 specify a different range per call to `generate`; this is the main 

40 use case.) We use a modified version of rejection sampling, where 

41 rejected values are stored in "rejection queues" if possible, and 

42 these rejection queues re-seed the next round of rejection sampling. 

43 

44 This is a Python reimplementation of James Coglan's [Node.js sequin 

45 module][JS_SEQUIN], as introduced in [his blog post][BLOG_POST]. It 

46 uses a [technique by Christian Lawson-Perfect][SEQUIN_TECHNIQUE]. 

47 I do not know why the original module is called "sequin"; I presume 

48 it to be a pun on "sequence". 

49 

50 [JS_SEQUIN]: https://www.npmjs.com/package/sequin 

51 [BLOG_POST]: https://blog.jcoglan.com/2012/07/16/designing-vaults-generator-algorithm/ 

52 [SEQUIN_TECHNIQUE]: https://checkmyworking.com/2012/06/converting-a-stream-of-binary-digits-to-a-stream-of-base-n-digits/ 

53 

54 """ 

55 def __init__( 

56 self, 

57 sequence: str | bytes | bytearray | Sequence[int], 

58 /, *, is_bitstring: bool = False 

59 ): 

60 """Initialize the Sequin. 

61 

62 Args: 

63 sequence: 

64 A sequence of bits, or things convertible to bits, to 

65 seed the pseudorandom number generator. Byte and text 

66 strings are converted to 8-bit integer sequences. 

67 (Conversion will fail if the text string contains 

68 non-ISO-8859-1 characters.) The numbers are then 

69 converted to bits. 

70 is_bitstring: 

71 If true, treat the input as a bitstring. By default, 

72 the input is treated as a string of 8-bit integers, from 

73 which the individual bits must still be extracted. 

74 

75 Raises: 

76 ValueError: 

77 The sequence contains values outside the permissible 

78 range. 

79 

80 """ 

81 def uint8_to_bits(value): 

82 """Yield individual bits of an 8-bit number, MSB first.""" 

83 for i in (0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01): 

84 yield 1 if value | i == value else 0 

85 if isinstance(sequence, str): 

86 try: 

87 sequence = tuple(sequence.encode('iso-8859-1')) 

88 except UnicodeError as e: 

89 raise ValueError('sequence item out of range') from e 

90 else: 

91 sequence = tuple(sequence) 

92 assert_type(sequence, tuple[int, ...]) 

93 self.bases: dict[int, collections.deque[int]] = {} 

94 def gen() -> Iterator[int]: 

95 for num in sequence: 

96 if num not in range(2 if is_bitstring else 256): 

97 raise ValueError('sequence item out of range') 

98 if is_bitstring: 

99 yield num 

100 else: 

101 yield from uint8_to_bits(num) 

102 self.bases[2] = collections.deque(gen()) 

103 

104 def _all_or_nothing_shift( 

105 self, count: int, /, *, base: int = 2 

106 ) -> Sequence[int]: 

107 """Shift and return items if and only if there are enough. 

108 

109 Args: 

110 count: Number of items to shift/consume. 

111 base: Use the base `base` sequence. 

112 

113 Returns: 

114 If there are sufficient items in the sequence left, then 

115 consume them from the sequence and return them. Otherwise, 

116 consume nothing, and return nothing. 

117 

118 Notes: 

119 We currently remove now-empty sequences from the registry of 

120 sequences. 

121 

122 Examples: 

123 >>> seq = Sequin([1, 0, 1, 0, 0, 1, 0, 0, 0, 1], 

124 ... is_bitstring=True) 

125 >>> seq.bases 

126 {2: deque([1, 0, 1, 0, 0, 1, 0, 0, 0, 1])} 

127 >>> seq._all_or_nothing_shift(3) 

128 (1, 0, 1) 

129 >>> seq._all_or_nothing_shift(3) 

130 (0, 0, 1) 

131 >>> seq.bases[2] 

132 deque([0, 0, 0, 1]) 

133 >>> seq._all_or_nothing_shift(5) 

134 () 

135 >>> seq.bases[2] 

136 deque([0, 0, 0, 1]) 

137 >>> seq._all_or_nothing_shift(4) 

138 (0, 0, 0, 1) 

139 >>> 2 in seq.bases # now-empty sequences are removed 

140 False 

141 

142 """ 

143 try: 

144 seq = self.bases[base] 

145 except KeyError: 

146 return () 

147 stash: collections.deque[int] = collections.deque() 

148 try: 

149 for i in range(count): 

150 stash.append(seq.popleft()) 

151 except IndexError: 

152 seq.extendleft(reversed(stash)) 

153 return () 

154 # Clean up queues. 

155 if not seq: 

156 del self.bases[base] 

157 return tuple(stash) 

158 

159 @staticmethod 

160 def _big_endian_number( 

161 digits: Sequence[int], /, *, base: int = 2 

162 ) -> int: 

163 """Evaluate the given integer sequence as a big endian number. 

164 

165 Args: 

166 digits: A sequence of integers to evaluate. 

167 base: The number base to evaluate those integers in. 

168 

169 Raises: 

170 ValueError: `base` is an invalid base. 

171 ValueError: Not all integers are valid base `base` digits. 

172 

173 Examples: 

174 >>> Sequin._big_endian_number([1, 2, 3, 4, 5, 6, 7, 8], base=10) 

175 12345678 

176 >>> Sequin._big_endian_number([1, 2, 3, 4, 5, 6, 7, 8], base=100) 

177 102030405060708 

178 >>> Sequin._big_endian_number([0, 0, 0, 0, 1, 4, 9, 7], base=10) 

179 1497 

180 >>> Sequin._big_endian_number([1, 0, 0, 1, 0, 0, 0, 0], base=2) 

181 144 

182 >>> Sequin._big_endian_number([1, 7, 5, 5], base=8) == 0o1755 

183 True 

184 

185 """ 

186 if base < 2: 

187 raise ValueError(f'invalid base: {base!r}') 

188 ret = 0 

189 allowed_range = range(base) 

190 n = len(digits) 

191 for i in range(n): 

192 i2 = (n - 1) - i 

193 x = digits[i] 

194 if not isinstance(x, int): 

195 raise TypeError(f'not an integer: {x!r}') 

196 if x not in allowed_range: 

197 raise ValueError(f'invalid base {base!r} digit: {x!r}') 

198 ret += (base ** i2) * x 

199 return ret 

200 

201 def generate(self, n: int, /) -> int: 

202 """Generate a base `n` non-negative integer. 

203 

204 We attempt to generate a value using rejection sampling. If the 

205 generated sample is outside the desired range (i.e., is 

206 rejected), then attempt to reuse the sample by seeding 

207 a "higher-order" input sequence of uniformly random numbers (for 

208 a different base). 

209 

210 Args: 

211 n: 

212 Generate numbers in the range 0, ..., `n` - 1. 

213 (Inclusive.) Must be larger than 0. 

214 

215 Returns: 

216 A pseudorandom number in the range 0, ..., `n` - 1. 

217 

218 Raises: 

219 ValueError: 

220 The range is empty. 

221 SequinExhaustedError: 

222 The sequin is exhausted. 

223 

224 Examples: 

225 >>> seq = Sequin([1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1], 

226 ... is_bitstring=True) 

227 >>> seq2 = Sequin([1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1], 

228 ... is_bitstring=True) 

229 >>> seq.generate(5) 

230 3 

231 >>> seq.generate(5) 

232 3 

233 >>> seq.generate(5) 

234 1 

235 >>> seq.generate(5) # doctest: +IGNORE_EXCEPTION_DETAIL 

236 Traceback (most recent call last): 

237 ... 

238 SequinExhaustedError: Sequin is exhausted 

239 

240 Using `n = 1` does not actually consume input bits: 

241 

242 >>> seq2.generate(1) 

243 0 

244 

245 But it still won't work on exhausted sequins: 

246 

247 >>> seq.generate(1) # doctest: +IGNORE_EXCEPTION_DETAIL 

248 Traceback (most recent call last): 

249 ... 

250 SequinExhaustedError: Sequin is exhausted 

251 

252 """ 

253 if 2 not in self.bases: 

254 raise SequinExhaustedError('Sequin is exhausted') 

255 value = self._generate_inner(n, base=2) 

256 if value == n: 

257 raise SequinExhaustedError('Sequin is exhausted') 

258 return value 

259 

260 def _generate_inner( 

261 self, n: int, /, *, base: int = 2 

262 ) -> int: 

263 """Recursive call to generate a base `n` non-negative integer. 

264 

265 We first determine the correct exponent `k` to generate base `n` 

266 numbers from a stream of base `base` numbers, then attempt to 

267 take `k` numbers from the base `base` sequence (or bail if not 

268 possible). If the resulting number `v` is out of range for 

269 base `n`, then push `v - n` onto the rejection queue for 

270 base `r` = `base` ** `k` - `n`, and attempt to generate the 

271 requested base `n` integer from the sequence of base `r` numbers 

272 next. (This recursion is not attempted if `r` = 1.) Otherwise, 

273 return the number. 

274 

275 Args: 

276 n: 

277 Generate numbers in the range 0, ..., `n` - 1. 

278 (Inclusive.) Must be larger than 0. 

279 base: 

280 Use the base `base` sequence as a source for 

281 pseudorandom numbers. 

282 

283 Returns: 

284 A pseudorandom number in the range 0, ..., `n` - 1 if 

285 possible, or `n` if the stream is exhausted. 

286 

287 Raises: 

288 ValueError: 

289 The range is empty. 

290 

291 Examples: 

292 >>> seq = Sequin([1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1], 

293 ... is_bitstring=True) 

294 >>> seq2 = Sequin([1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1], 

295 ... is_bitstring=True) 

296 >>> seq._generate_inner(5) 

297 3 

298 >>> seq._generate_inner(5) 

299 3 

300 >>> seq._generate_inner(5) 

301 1 

302 >>> seq._generate_inner(5) # error condition: sequin exhausted 

303 5 

304 

305 Using `n = 1` does not actually consume input bits, and 

306 always works, regardless of sequin exhaustion: 

307 

308 >>> seq2._generate_inner(1) 

309 0 

310 >>> seq._generate_inner(1) 

311 0 

312 

313 Using an unsuitable range will raise: 

314 

315 >>> seq2._generate_inner(0) # doctest: +IGNORE_EXCEPTION_DETAIL 

316 Traceback (most recent call last): 

317 ... 

318 ValueError: invalid target range 

319 

320 """ 

321 if n < 1: 

322 raise ValueError('invalid target range') 

323 if base < 2: 

324 raise ValueError(f'invalid base: {base!r}') 

325 # p = base ** k, where k is the smallest integer such that 

326 # p >= n. We determine p and k inductively. 

327 p = 1 

328 k = 0 

329 while p < n: 

330 p *= base 

331 k += 1 

332 # The remainder r of p and n is used as the base for rejection 

333 # queue. 

334 r = p - n 

335 # The generated number v is initialized to n because of the 

336 # while loop below. 

337 v = n 

338 while v > n - 1: 

339 list_slice = self._all_or_nothing_shift(k, base=base) 

340 if not list_slice: 

341 if n != 1: 

342 return n 

343 else: 

344 v = 0 

345 v = self._big_endian_number(list_slice, base=base) 

346 if v > n - 1: 

347 # If r is 0, then p == n, so v < n, or rather 

348 # v <= n - 1. 

349 assert r > 0 

350 if r == 1: 

351 continue 

352 self._stash(v - n, base=r) 

353 v = self._generate_inner(n, base=r) 

354 return v 

355 

356 def _stash(self, value: int, /, *, base: int = 2) -> None: 

357 """Stash `value` on the base `base` sequence.""" 

358 if base not in self.bases: 

359 self.bases[base] = collections.deque() 

360 self.bases[base].append(value) 

361 

362class SequinExhaustedError(Exception): 

363 """The sequin is exhausted. 

364 

365 No more values can be generated from this sequin. 

366 

367 """