Source code for jeevesagent.loader.excel
"""Excel (.xlsx / .xlsm) loader → markdown.
Uses ``openpyxl`` (lazy import). Each sheet becomes a markdown
section with the sheet name as an ``#`` heading and the cell grid
as a markdown table. Empty rows / columns at the edges are
trimmed.
For very wide / tall sheets, the markdown output can balloon —
prefer pandas + tailored summarization for spreadsheets that
exceed an LLM's comfortable context. This loader is intended for
the common case (config sheets, lookup tables, structured records).
"""
from __future__ import annotations
from pathlib import Path
from typing import Any
from .base import Document
def _stringify(cell: Any) -> str:
"""Render a cell value as a markdown-table-safe string."""
if cell is None:
return ""
s = str(cell)
return s.replace("|", "\\|").replace("\n", " ")
def _trim_empty_edges(
rows: list[list[Any]],
) -> list[list[str]]:
"""Drop trailing all-empty rows and trailing empty columns."""
string_rows = [
[_stringify(c) for c in row] for row in rows
]
# Trim trailing empty rows
while string_rows and not any(
c.strip() for c in string_rows[-1]
):
string_rows.pop()
if not string_rows:
return []
# Trim trailing empty columns
max_meaningful = 0
for row in string_rows:
for i in range(len(row) - 1, -1, -1):
if row[i].strip():
max_meaningful = max(max_meaningful, i + 1)
break
if max_meaningful:
string_rows = [row[:max_meaningful] for row in string_rows]
else:
string_rows = []
return string_rows
def _rows_to_markdown_table(rows: list[list[str]]) -> str:
if not rows:
return "(empty)"
header = rows[0]
body = rows[1:]
out = [
"| " + " | ".join(header) + " |",
"| " + " | ".join("---" for _ in header) + " |",
]
for row in body:
padded = row + [""] * (len(header) - len(row))
out.append("| " + " | ".join(padded) + " |")
return "\n".join(out)
[docs]
def load_excel(path: str | Path) -> Document:
"""Load an Excel workbook → markdown.
Each sheet becomes ``## {sheet_name}`` with the cell grid as a
markdown table. Formula cells return their cached values
(data_only=True).
Requires ``openpyxl``:
``pip install 'jeevesagent[loader-excel]'``.
"""
try:
import openpyxl # type: ignore[import-not-found, import-untyped]
except ImportError as exc: # pragma: no cover
raise ImportError(
"openpyxl is not installed. "
"Install with: pip install 'jeevesagent[loader-excel]'."
) from exc
p = Path(path)
wb = openpyxl.load_workbook(str(p), data_only=True, read_only=True)
parts: list[str] = [f"# {p.stem}\n"]
sheet_names: list[str] = []
total_rows = 0
for sheet in wb.worksheets:
sheet_names.append(sheet.title)
rows = list(sheet.iter_rows(values_only=True))
trimmed = _trim_empty_edges([list(r) for r in rows])
total_rows += max(len(trimmed) - 1, 0)
parts.append(f"## {sheet.title}\n")
if not trimmed:
parts.append("(empty)\n")
continue
parts.append(_rows_to_markdown_table(trimmed))
parts.append("")
wb.close()
return Document(
content="\n".join(parts).rstrip() + "\n",
metadata={
"source": str(p),
"format": "xlsx",
"sheet_count": len(sheet_names),
"sheet_names": sheet_names,
"row_count": total_rows,
},
)