Coverage for src/prosemark/domain/compile/service.py: 100%

40 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-09-30 23:09 +0000

1"""Core domain service for compiling node subtrees. 

2 

3This module implements the business logic for traversing 

4and compiling prosemark node hierarchies. 

5""" 

6 

7import re 

8from collections.abc import Generator 

9from pathlib import Path 

10 

11from prosemark.domain.compile.models import CompileRequest, CompileResult, NodeContent 

12from prosemark.domain.models import NodeId 

13from prosemark.ports.node_repo import NodeRepo 

14 

15 

16class CompileService: 

17 """Domain service for compiling node subtrees into concatenated text. 

18 

19 This service implements the core business logic for: 

20 - Depth-first traversal of node hierarchies 

21 - Content concatenation with proper formatting 

22 - Statistics tracking (node counts, empty handling) 

23 - Memory-efficient streaming processing 

24 """ 

25 

26 def __init__(self, node_repo: NodeRepo) -> None: 

27 """Initialize the compile service. 

28 

29 Args: 

30 node_repo: Repository for accessing node data and relationships 

31 

32 """ 

33 self._node_repo = node_repo 

34 

35 def compile_subtree(self, request: CompileRequest) -> CompileResult: 

36 """Compile a node and all its descendants into plain text. 

37 

38 This method traverses the node subtree in depth-first pre-order, 

39 concatenates content with double newlines, and tracks statistics. 

40 

41 Args: 

42 request: The compile request with target node and options 

43 

44 Returns: 

45 CompileResult containing the concatenated content and statistics 

46 

47 Raises: 

48 NodeNotFoundError: If the specified node_id doesn't exist 

49 

50 """ 

51 try: 

52 # Verify the root node exists by checking if it has frontmatter 

53 self._node_repo.read_frontmatter(request.node_id) 

54 except Exception as e: 

55 from prosemark.ports.compile.service import NodeNotFoundError 

56 

57 raise NodeNotFoundError(request.node_id) from e 

58 

59 # Collect content using depth-first traversal 

60 content_parts = [] 

61 node_count = 0 

62 total_nodes = 0 

63 skipped_empty = 0 

64 

65 for node_content in self._traverse_depth_first(request.node_id): 

66 total_nodes += 1 

67 

68 # Apply empty content filtering based on request 

69 if not node_content.content.strip() and not request.include_empty: 

70 skipped_empty += 1 

71 continue 

72 

73 # Include this node's content 

74 content_parts.append(node_content.content) 

75 node_count += 1 

76 

77 # Join with double newlines 

78 final_content = '\n\n'.join(content_parts) 

79 

80 return CompileResult( 

81 content=final_content, node_count=node_count, total_nodes=total_nodes, skipped_empty=skipped_empty 

82 ) 

83 

84 def _traverse_depth_first(self, node_id: NodeId) -> Generator[NodeContent, None, None]: 

85 """Traverse nodes in depth-first pre-order. 

86 

87 Args: 

88 node_id: The root node to start traversal from 

89 

90 Yields: 

91 NodeContent objects in depth-first pre-order 

92 

93 Raises: 

94 NodeNotFoundError: If any required node doesn't exist 

95 

96 """ 

97 # Verify node exists by reading frontmatter 

98 try: 

99 self._node_repo.read_frontmatter(node_id) 

100 except Exception as e: 

101 from prosemark.ports.compile.service import NodeNotFoundError 

102 

103 raise NodeNotFoundError(node_id) from e 

104 

105 # Read the node content from the draft file 

106 content = self._read_node_content(node_id) 

107 

108 # Get children from binder 

109 children_ids = self._get_children_from_binder(node_id) 

110 

111 # Yield current node first (pre-order) 

112 yield NodeContent(id=node_id, content=content, children=children_ids) 

113 

114 # Recursively traverse children 

115 for child_id in children_ids: 

116 try: 

117 yield from self._traverse_depth_first(child_id) 

118 except (FileNotFoundError, PermissionError, OSError): # pragma: no cover 

119 # Skip missing child nodes rather than failing the entire compilation 

120 continue 

121 

122 @staticmethod 

123 def _read_node_content(node_id: NodeId) -> str: # pragma: no cover 

124 """Read the content of a node from its draft file. 

125 

126 Args: 

127 node_id: The node to read content from 

128 

129 Returns: 

130 The content with frontmatter stripped, empty string if file doesn't exist 

131 

132 """ 

133 # Construct the draft file path 

134 file_path = Path(f'nodes/{node_id}/draft.md') 

135 

136 try: 

137 content = file_path.read_text(encoding='utf-8') 

138 

139 # Remove frontmatter if present 

140 if content.startswith('---\n'): 

141 # Find the end of frontmatter 

142 end_marker = content.find('\n---\n') 

143 if end_marker != -1: 

144 content = content[end_marker + 5 :] # Skip past the closing ---\n 

145 else: 

146 # Malformed frontmatter, return as-is 

147 pass 

148 

149 return content.strip() 

150 

151 except (FileNotFoundError, PermissionError, OSError): 

152 # File doesn't exist or can't be read - return empty content 

153 return '' 

154 

155 @staticmethod 

156 def _get_children_from_binder(node_id: NodeId) -> list[NodeId]: # pragma: no cover 

157 """Get the list of child node IDs from the binder file. 

158 

159 Args: 

160 node_id: The parent node to get children for 

161 

162 Returns: 

163 List of child node IDs in binder order, empty list if no binder or errors 

164 

165 """ 

166 # Construct the binder file path 

167 binder_path = Path(f'nodes/{node_id}/binder.yaml') 

168 

169 try: 

170 binder_content = binder_path.read_text(encoding='utf-8') 

171 

172 # Extract node IDs using regex pattern 

173 # Look for entries like "- 01923456-789a-7123-8abc-def012345678" 

174 uuid_pattern = r'- ([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})' 

175 matches = re.findall(uuid_pattern, binder_content) 

176 

177 return [NodeId(match) for match in matches] 

178 

179 except (FileNotFoundError, PermissionError, OSError): 

180 # Binder file doesn't exist or can't be read - return empty list 

181 return []