Coverage for lmcat\lmcat.py: 55%
145 statements
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-31 20:34 -0700
« prev ^ index » next coverage.py v7.6.10, created at 2024-12-31 20:34 -0700
1from __future__ import annotations
3import argparse
4import io
5import json
6import os
7from dataclasses import dataclass
8from pathlib import Path
9import sys
10from typing import Any, Optional
12# Handle Python 3.11+ vs older Python for TOML parsing
13try:
14 import tomllib
15except ImportError:
16 try:
17 import tomli as tomllib # type: ignore
18 except ImportError:
19 tomllib = None # type: ignore[assignment]
21import igittigitt
24@dataclass
25class LMCatConfig:
26 """Configuration dataclass for lmcat
28 # Parameters:
29 - `tree_divider: str`
30 - `indent: str`
31 - `file_divider: str`
32 - `content_divider: str`
33 - `include_gitignore: bool` (default True)
34 - `tree_only: bool` (default False)
35 """
37 tree_divider: str = "│ "
38 indent: str = " "
39 file_divider: str = "├── "
40 content_divider: str = "``````"
41 include_gitignore: bool = True
42 tree_only: bool = False
44 @classmethod
45 def load(cls, cfg_data: dict[str, Any]) -> LMCatConfig:
46 """Load an LMCatConfig from a dictionary of config values"""
47 config = cls()
48 for key, val in cfg_data.items():
49 if key in config.__dataclass_fields__:
50 # Convert booleans if needed
51 if isinstance(getattr(config, key), bool) and isinstance(val, str):
52 lower_val = val.strip().lower()
53 if lower_val in ("true", "1", "yes"):
54 val = True
55 elif lower_val in ("false", "0", "no"):
56 val = False
57 setattr(config, key, val)
58 return config
60 @classmethod
61 def read(cls, root_dir: Path) -> LMCatConfig:
62 """Attempt to read config from pyproject.toml, lmcat.toml, or lmcat.json."""
63 pyproject_path = root_dir / "pyproject.toml"
64 lmcat_toml_path = root_dir / "lmcat.toml"
65 lmcat_json_path = root_dir / "lmcat.json"
67 # Try pyproject.toml first
68 if tomllib is not None and pyproject_path.is_file():
69 with pyproject_path.open("rb") as f:
70 pyproject_data = tomllib.load(f)
71 if "tool" in pyproject_data and "lmcat" in pyproject_data["tool"]:
72 return cls.load(pyproject_data["tool"]["lmcat"])
74 # Then try lmcat.toml
75 if tomllib is not None and lmcat_toml_path.is_file():
76 with lmcat_toml_path.open("rb") as f:
77 toml_data = tomllib.load(f)
78 return cls.load(toml_data)
80 # Finally try lmcat.json
81 if lmcat_json_path.is_file():
82 with lmcat_json_path.open("r", encoding="utf-8") as f:
83 json_data = json.load(f)
84 return cls.load(json_data)
86 # Fallback to defaults
87 return cls()
90class IgnoreHandler:
91 """Handles all ignore pattern matching using igittigitt"""
93 def __init__(self, root_dir: Path, config: LMCatConfig):
94 self.parser: igittigitt.IgnoreParser = igittigitt.IgnoreParser()
95 self.root_dir: Path = root_dir
96 self.config: LMCatConfig = config
97 self._init_parser()
99 def _init_parser(self) -> None:
100 """Initialize the parser with all relevant ignore files"""
101 # If we're including gitignore, let igittigitt handle it natively
102 if self.config.include_gitignore:
103 self.parser.parse_rule_files(self.root_dir, filename=".gitignore")
105 # Add all .lmignore files
106 for current_dir, _, files in os.walk(self.root_dir):
107 current_path: Path = Path(current_dir)
108 lmignore: Path = current_path / ".lmignore"
109 if lmignore.is_file():
110 self.parser.parse_rule_files(current_path, filename=".lmignore")
112 def is_ignored(self, path: Path) -> bool:
113 """Check if a path should be ignored"""
114 # Never ignore the gitignore/lmignore files themselves
115 if path.name in {".gitignore", ".lmignore"}:
116 return True
118 # Use igittigitt's matching
119 return self.parser.match(path)
122def sorted_entries(directory: Path) -> list[Path]:
123 """Return directory contents sorted: directories first, then files"""
124 subdirs: list[Path] = sorted(
125 [p for p in directory.iterdir() if p.is_dir()], key=lambda x: x.name
126 )
127 files: list[Path] = sorted(
128 [p for p in directory.iterdir() if p.is_file()], key=lambda x: x.name
129 )
130 return subdirs + files
133def walk_dir(
134 directory: Path,
135 ignore_handler: IgnoreHandler,
136 config: LMCatConfig,
137 prefix: str = "",
138) -> tuple[list[str], list[Path]]:
139 """Recursively walk a directory, building tree lines and collecting file paths"""
140 tree_output: list[str] = []
141 collected_files: list[Path] = []
143 entries: list[Path] = sorted_entries(directory)
144 for i, entry in enumerate(entries):
145 if ignore_handler.is_ignored(entry):
146 continue
148 is_last: bool = i == len(entries) - 1
149 connector: str = (
150 config.file_divider
151 if not is_last
152 else config.file_divider.replace("├", "└")
153 )
155 if entry.is_dir():
156 tree_output.append(f"{prefix}{connector}{entry.name}")
157 extension: str = config.tree_divider if not is_last else config.indent
158 sub_output: list[str]
159 sub_files: list[Path]
160 sub_output, sub_files = walk_dir(
161 entry, ignore_handler, config, prefix + extension
162 )
163 tree_output.extend(sub_output)
164 collected_files.extend(sub_files)
165 else:
166 tree_output.append(f"{prefix}{connector}{entry.name}")
167 collected_files.append(entry)
169 return tree_output, collected_files
172def walk_and_collect(
173 root_dir: Path, config: Optional[LMCatConfig] = None
174) -> tuple[list[str], list[Path]]:
175 """Walk filesystem from root_dir and gather tree listing plus file paths"""
176 if config is None:
177 config = LMCatConfig()
179 ignore_handler: IgnoreHandler = IgnoreHandler(root_dir, config)
180 base_name: str = root_dir.resolve().name
182 # Start with root directory name
183 tree_output: list[str] = [base_name]
185 # Walk the directory tree
186 sub_output: list[str]
187 sub_files: list[Path]
188 sub_output, sub_files = walk_dir(root_dir, ignore_handler, config)
189 tree_output.extend(sub_output)
191 return tree_output, sub_files
194def main() -> None:
195 """Main entry point for the script"""
196 parser = argparse.ArgumentParser(
197 description="lmcat - list tree and content, combining .gitignore + .lmignore",
198 add_help=False,
199 )
200 parser.add_argument(
201 "-g",
202 "--no-include-gitignore",
203 action="store_false",
204 dest="include_gitignore",
205 default=True,
206 help="Do not parse .gitignore files, only .lmignore (default: parse them).",
207 )
208 parser.add_argument(
209 "-t",
210 "--tree-only",
211 action="store_true",
212 default=False,
213 help="Only print the tree, not the file contents.",
214 )
215 parser.add_argument(
216 "-o",
217 "--output",
218 action="store",
219 default=None,
220 help="Output file to write the tree and contents to.",
221 )
222 parser.add_argument(
223 "-h", "--help", action="help", help="Show this help message and exit."
224 )
226 args, unknown = parser.parse_known_args()
228 root_dir = Path(".").resolve()
229 config = LMCatConfig.read(root_dir)
231 # CLI overrides
232 config.include_gitignore = args.include_gitignore
233 config.tree_only = args.tree_only
235 tree_output, collected_files = walk_and_collect(root_dir, config)
237 output: list[str] = []
238 output.append("# File Tree")
239 output.append("\n```")
240 output.extend(tree_output)
241 output.append("```\n")
243 cwd = Path.cwd()
245 # Add file contents if not suppressed
246 if not config.tree_only:
247 output.append("# File Contents")
249 for fpath in collected_files:
250 relpath_posix = fpath.relative_to(cwd).as_posix()
251 pathspec_start = f'{ path: "{relpath_posix}" } '
252 pathspec_end = f'{ end_of_file: "{relpath_posix}" } '
253 output.append("")
254 output.append(config.content_divider + pathspec_start)
255 with fpath.open("r", encoding="utf-8", errors="ignore") as fobj:
256 output.append(fobj.read())
257 output.append(config.content_divider + pathspec_end)
259 # Write output
260 if args.output:
261 Path(args.output).parent.mkdir(parents=True, exist_ok=True)
262 with open(args.output, "w", encoding="utf-8") as f:
263 f.write("\n".join(output))
264 else:
265 if sys.platform == "win32":
266 sys.stdout = io.TextIOWrapper(
267 sys.stdout.buffer, encoding="utf-8", errors="replace"
268 )
269 sys.stderr = io.TextIOWrapper(
270 sys.stderr.buffer, encoding="utf-8", errors="replace"
271 )
273 print("\n".join(output))
276if __name__ == "__main__":
277 main()