Coverage for src/gentrie/trie/traversal.py: 100%

41 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-08-17 12:20 -0700

1# -*- coding: utf-8 -*- 

2"""Traversal operations for GeneralizedTrie.""" 

3 

4from collections import deque 

5from typing import Generator 

6 

7from ..exceptions import InvalidGeneralizedKeyError, ErrorTag, TrieTypeError, TrieValueError 

8from ..protocols import GeneralizedKey 

9from ..types import TrieEntry 

10from ..validation import is_generalizedkey 

11 

12from .trie_mixins import TrieMixinsInterface 

13 

14# Disabled because pyright does not understand mixins 

15# use of private attributes from the mixing class as declared 

16# in the TrieMixinsInterface protocol. 

17# pyright: reportPrivateUsage=false 

18 

19 

20class TrieTraversalMixin: 

21 """Mixin providing traversal operations (prefixes, prefixed_by).""" 

22 

23 def prefixes(self: TrieMixinsInterface, key: GeneralizedKey) -> Generator[TrieEntry, None, None]: 

24 """Yields TrieEntry instances for all keys in the trie that are a prefix of the passed key. 

25 

26 Searches the trie for all keys that are prefix matches 

27 for the key and yields their TrieEntry instances. 

28 

29 .. note:: 

30 

31 The `prefixes` method finds all keys that are prefixes of the passed 

32 key. For example, `trie.prefixes('apple')` will find entries for 

33 keys like 'a', 'apple' and 'app'. 

34 

35 .. warning:: **GOTCHA: Generators** 

36 

37 Because generators are not executed until the first iteration, 

38 they may not behave as expected if not consumed properly. For example, 

39 exceptions will not be raised until the generator is iterated over. 

40 

41 Args: 

42 key (GeneralizedKey): Key for matching. 

43 

44 Yields: 

45 :class:`TrieEntry`: The next matching :class:`TrieEntry` instance. 

46 

47 Raises: 

48 InvalidGeneralizedKeyError: 

49 If key is not a valid :class:`GeneralizedKey` 

50 (is not a :class:`Sequence` of :class:`TrieKeyToken` objects). 

51 

52 Usage:: 

53 

54 from gentrie import GeneralizedTrie, TrieEntry 

55 

56 trie: GeneralizedTrie = GeneralizedTrie() 

57 keys: list[str] = ['abcdef', 'abc', 'a', 'abcd', 'qrs'] 

58 for entry in keys: 

59 trie.add(entry) 

60 matches_generator: Generator[TrieEntry, None, None] = trie.prefixes('abcd') 

61 for trie_entry in sorted(list(matches_generator)): 

62 print(f'{trie_entry.ident}: {trie_entry.key}') 

63 

64 # 2: abc 

65 # 3: a 

66 # 4: abcd 

67 """ 

68 if self.runtime_validation and not is_generalizedkey(key): 

69 raise InvalidGeneralizedKeyError( 

70 msg="key is not a valid `GeneralizedKey`", 

71 tag=ErrorTag.TRIE_PREFIXES_INVALID_GENERALIZED_KEY 

72 ) 

73 

74 current_node = self 

75 

76 for token in key: 

77 if current_node.ident: 

78 yield self._trie_entries[current_node.ident] 

79 if token not in current_node.children: 

80 return # no match in children, so the generator is done 

81 current_node = current_node.children[token] 

82 

83 # If we reached here, we have a match for the full key 

84 if current_node.ident: 

85 yield self._trie_entries[current_node.ident] 

86 

87 def prefixed_by(self: TrieMixinsInterface, key: GeneralizedKey, depth: int = -1 

88 ) -> Generator[TrieEntry, None, None]: 

89 """Yields all entries in the trie that are prefixed by the given key, up to a specified depth. 

90 

91 Searches the trie for all keys that start with the provided key and yields their 

92 :class:`TrieEntry` instances. 

93 

94 .. note:: 

95 The `prefixed_by` method finds all keys that start with the given 

96 prefix. For example, `trie.prefixed_by('app')` will find entries for 

97 keys like 'apple' and 'application'. 

98 

99 .. warning:: **GOTCHA: Generators** 

100 

101 Because generators are not executed until the first iteration, 

102 they may not behave as expected if not consumed properly. For example, 

103 exceptions will not be raised until the generator is iterated over. 

104 

105 Args: 

106 key (GeneralizedKey): Key for matching. 

107 depth (`int`, default=-1): Depth starting from the matched key to include. 

108 The depth determines how many 'layers' deeper into the trie to look for prefixed_by.: 

109 * A depth of -1 (the default) includes ALL entries for the exact match and all children nodes. 

110 * A depth of 0 only includes the entries for the *exact* match for the key. 

111 * A depth of 1 includes entries for the exact match and the next layer down. 

112 * A depth of 2 includes entries for the exact match and the next two layers down. 

113 

114 Yields: 

115 :class:`TrieEntry`: The next matching :class:`TrieEntry` instance. 

116 

117 Raises: 

118 InvalidGeneralizedKeyError: 

119 If key arg is not a valid GeneralizedKey. 

120 TrieTypeError: 

121 If depth arg is not an int. 

122 TrieValueError: 

123 If depth arg is less than -1. 

124 

125 

126 Usage:: 

127 

128 from gentrie import GeneralizedTrie, TrieEntry 

129 

130 trie = GeneralizedTrie() 

131 keys: list[str] = ['abcdef', 'abc', 'a', 'abcd', 'qrs'] 

132 for entry in keys: 

133 trie.add(entry) 

134 matches_generator = trie.prefixed_by('abcd') 

135 

136 for trie_entry in sorted(list(matches_generator)): 

137 print(f'{trie_entry.ident}: {trie_entry.key}') 

138 

139 # 1: abcdef 

140 # 4: abcd 

141 """ 

142 if self.runtime_validation and not is_generalizedkey(key): 

143 raise InvalidGeneralizedKeyError( 

144 msg="key arg is not a valid GeneralizedKey", 

145 tag=ErrorTag.TRIE_PREFIXED_BY_INVALID_GENERALIZED_KEY 

146 ) 

147 

148 if not isinstance(depth, int): # type: ignore[reportUnnecessaryIsInstance] 

149 raise TrieTypeError( 

150 msg="depth must be an int", 

151 tag=ErrorTag.TRIE_PREFIXED_BY_BAD_DEPTH_TYPE 

152 ) 

153 if depth < -1: 

154 raise TrieValueError( 

155 msg="depth cannot be less than -1", 

156 tag=ErrorTag.TRIE_PREFIXED_BY_BAD_DEPTH_VALUE 

157 ) 

158 

159 current_node = self 

160 try: 

161 for token in key: 

162 current_node = current_node.children[token] 

163 except KeyError: 

164 return # No match, so the generator is empty 

165 

166 # Perform a breadth-first search to collect prefixed keys up to the specified depth 

167 queue = deque([(current_node, depth)]) 

168 

169 while queue: 

170 node, current_depth = queue.popleft() 

171 if node.ident: 

172 yield self._trie_entries[node.ident] 

173 if current_depth != 0: 

174 for child in node.children.values(): 

175 queue.append((child, current_depth - 1))