Coverage for lmcat\lmcat.py: 55%

145 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2024-12-31 20:34 -0700

1from __future__ import annotations 

2 

3import argparse 

4import io 

5import json 

6import os 

7from dataclasses import dataclass 

8from pathlib import Path 

9import sys 

10from typing import Any, Optional 

11 

12# Handle Python 3.11+ vs older Python for TOML parsing 

13try: 

14 import tomllib 

15except ImportError: 

16 try: 

17 import tomli as tomllib # type: ignore 

18 except ImportError: 

19 tomllib = None # type: ignore[assignment] 

20 

21import igittigitt 

22 

23 

24@dataclass 

25class LMCatConfig: 

26 """Configuration dataclass for lmcat 

27 

28 # Parameters: 

29 - `tree_divider: str` 

30 - `indent: str` 

31 - `file_divider: str` 

32 - `content_divider: str` 

33 - `include_gitignore: bool` (default True) 

34 - `tree_only: bool` (default False) 

35 """ 

36 

37 tree_divider: str = "│ " 

38 indent: str = " " 

39 file_divider: str = "├── " 

40 content_divider: str = "``````" 

41 include_gitignore: bool = True 

42 tree_only: bool = False 

43 

44 @classmethod 

45 def load(cls, cfg_data: dict[str, Any]) -> LMCatConfig: 

46 """Load an LMCatConfig from a dictionary of config values""" 

47 config = cls() 

48 for key, val in cfg_data.items(): 

49 if key in config.__dataclass_fields__: 

50 # Convert booleans if needed 

51 if isinstance(getattr(config, key), bool) and isinstance(val, str): 

52 lower_val = val.strip().lower() 

53 if lower_val in ("true", "1", "yes"): 

54 val = True 

55 elif lower_val in ("false", "0", "no"): 

56 val = False 

57 setattr(config, key, val) 

58 return config 

59 

60 @classmethod 

61 def read(cls, root_dir: Path) -> LMCatConfig: 

62 """Attempt to read config from pyproject.toml, lmcat.toml, or lmcat.json.""" 

63 pyproject_path = root_dir / "pyproject.toml" 

64 lmcat_toml_path = root_dir / "lmcat.toml" 

65 lmcat_json_path = root_dir / "lmcat.json" 

66 

67 # Try pyproject.toml first 

68 if tomllib is not None and pyproject_path.is_file(): 

69 with pyproject_path.open("rb") as f: 

70 pyproject_data = tomllib.load(f) 

71 if "tool" in pyproject_data and "lmcat" in pyproject_data["tool"]: 

72 return cls.load(pyproject_data["tool"]["lmcat"]) 

73 

74 # Then try lmcat.toml 

75 if tomllib is not None and lmcat_toml_path.is_file(): 

76 with lmcat_toml_path.open("rb") as f: 

77 toml_data = tomllib.load(f) 

78 return cls.load(toml_data) 

79 

80 # Finally try lmcat.json 

81 if lmcat_json_path.is_file(): 

82 with lmcat_json_path.open("r", encoding="utf-8") as f: 

83 json_data = json.load(f) 

84 return cls.load(json_data) 

85 

86 # Fallback to defaults 

87 return cls() 

88 

89 

90class IgnoreHandler: 

91 """Handles all ignore pattern matching using igittigitt""" 

92 

93 def __init__(self, root_dir: Path, config: LMCatConfig): 

94 self.parser: igittigitt.IgnoreParser = igittigitt.IgnoreParser() 

95 self.root_dir: Path = root_dir 

96 self.config: LMCatConfig = config 

97 self._init_parser() 

98 

99 def _init_parser(self) -> None: 

100 """Initialize the parser with all relevant ignore files""" 

101 # If we're including gitignore, let igittigitt handle it natively 

102 if self.config.include_gitignore: 

103 self.parser.parse_rule_files(self.root_dir, filename=".gitignore") 

104 

105 # Add all .lmignore files 

106 for current_dir, _, files in os.walk(self.root_dir): 

107 current_path: Path = Path(current_dir) 

108 lmignore: Path = current_path / ".lmignore" 

109 if lmignore.is_file(): 

110 self.parser.parse_rule_files(current_path, filename=".lmignore") 

111 

112 def is_ignored(self, path: Path) -> bool: 

113 """Check if a path should be ignored""" 

114 # Never ignore the gitignore/lmignore files themselves 

115 if path.name in {".gitignore", ".lmignore"}: 

116 return True 

117 

118 # Use igittigitt's matching 

119 return self.parser.match(path) 

120 

121 

122def sorted_entries(directory: Path) -> list[Path]: 

123 """Return directory contents sorted: directories first, then files""" 

124 subdirs: list[Path] = sorted( 

125 [p for p in directory.iterdir() if p.is_dir()], key=lambda x: x.name 

126 ) 

127 files: list[Path] = sorted( 

128 [p for p in directory.iterdir() if p.is_file()], key=lambda x: x.name 

129 ) 

130 return subdirs + files 

131 

132 

133def walk_dir( 

134 directory: Path, 

135 ignore_handler: IgnoreHandler, 

136 config: LMCatConfig, 

137 prefix: str = "", 

138) -> tuple[list[str], list[Path]]: 

139 """Recursively walk a directory, building tree lines and collecting file paths""" 

140 tree_output: list[str] = [] 

141 collected_files: list[Path] = [] 

142 

143 entries: list[Path] = sorted_entries(directory) 

144 for i, entry in enumerate(entries): 

145 if ignore_handler.is_ignored(entry): 

146 continue 

147 

148 is_last: bool = i == len(entries) - 1 

149 connector: str = ( 

150 config.file_divider 

151 if not is_last 

152 else config.file_divider.replace("├", "└") 

153 ) 

154 

155 if entry.is_dir(): 

156 tree_output.append(f"{prefix}{connector}{entry.name}") 

157 extension: str = config.tree_divider if not is_last else config.indent 

158 sub_output: list[str] 

159 sub_files: list[Path] 

160 sub_output, sub_files = walk_dir( 

161 entry, ignore_handler, config, prefix + extension 

162 ) 

163 tree_output.extend(sub_output) 

164 collected_files.extend(sub_files) 

165 else: 

166 tree_output.append(f"{prefix}{connector}{entry.name}") 

167 collected_files.append(entry) 

168 

169 return tree_output, collected_files 

170 

171 

172def walk_and_collect( 

173 root_dir: Path, config: Optional[LMCatConfig] = None 

174) -> tuple[list[str], list[Path]]: 

175 """Walk filesystem from root_dir and gather tree listing plus file paths""" 

176 if config is None: 

177 config = LMCatConfig() 

178 

179 ignore_handler: IgnoreHandler = IgnoreHandler(root_dir, config) 

180 base_name: str = root_dir.resolve().name 

181 

182 # Start with root directory name 

183 tree_output: list[str] = [base_name] 

184 

185 # Walk the directory tree 

186 sub_output: list[str] 

187 sub_files: list[Path] 

188 sub_output, sub_files = walk_dir(root_dir, ignore_handler, config) 

189 tree_output.extend(sub_output) 

190 

191 return tree_output, sub_files 

192 

193 

194def main() -> None: 

195 """Main entry point for the script""" 

196 parser = argparse.ArgumentParser( 

197 description="lmcat - list tree and content, combining .gitignore + .lmignore", 

198 add_help=False, 

199 ) 

200 parser.add_argument( 

201 "-g", 

202 "--no-include-gitignore", 

203 action="store_false", 

204 dest="include_gitignore", 

205 default=True, 

206 help="Do not parse .gitignore files, only .lmignore (default: parse them).", 

207 ) 

208 parser.add_argument( 

209 "-t", 

210 "--tree-only", 

211 action="store_true", 

212 default=False, 

213 help="Only print the tree, not the file contents.", 

214 ) 

215 parser.add_argument( 

216 "-o", 

217 "--output", 

218 action="store", 

219 default=None, 

220 help="Output file to write the tree and contents to.", 

221 ) 

222 parser.add_argument( 

223 "-h", "--help", action="help", help="Show this help message and exit." 

224 ) 

225 

226 args, unknown = parser.parse_known_args() 

227 

228 root_dir = Path(".").resolve() 

229 config = LMCatConfig.read(root_dir) 

230 

231 # CLI overrides 

232 config.include_gitignore = args.include_gitignore 

233 config.tree_only = args.tree_only 

234 

235 tree_output, collected_files = walk_and_collect(root_dir, config) 

236 

237 output: list[str] = [] 

238 output.append("# File Tree") 

239 output.append("\n```") 

240 output.extend(tree_output) 

241 output.append("```\n") 

242 

243 cwd = Path.cwd() 

244 

245 # Add file contents if not suppressed 

246 if not config.tree_only: 

247 output.append("# File Contents") 

248 

249 for fpath in collected_files: 

250 relpath_posix = fpath.relative_to(cwd).as_posix() 

251 pathspec_start = f'{ path: "{relpath_posix}" } ' 

252 pathspec_end = f'{ end_of_file: "{relpath_posix}" } ' 

253 output.append("") 

254 output.append(config.content_divider + pathspec_start) 

255 with fpath.open("r", encoding="utf-8", errors="ignore") as fobj: 

256 output.append(fobj.read()) 

257 output.append(config.content_divider + pathspec_end) 

258 

259 # Write output 

260 if args.output: 

261 Path(args.output).parent.mkdir(parents=True, exist_ok=True) 

262 with open(args.output, "w", encoding="utf-8") as f: 

263 f.write("\n".join(output)) 

264 else: 

265 if sys.platform == "win32": 

266 sys.stdout = io.TextIOWrapper( 

267 sys.stdout.buffer, encoding="utf-8", errors="replace" 

268 ) 

269 sys.stderr = io.TextIOWrapper( 

270 sys.stderr.buffer, encoding="utf-8", errors="replace" 

271 ) 

272 

273 print("\n".join(output)) 

274 

275 

276if __name__ == "__main__": 

277 main()