Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ groups \ reports.py: 28%
57 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-27 20:09 -0800
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-27 20:09 -0800
1# -*- coding: utf-8 -*-
2from __future__ import annotations
5"""Reports group utilities for storing report and image artifacts in MTH5."""
7from pathlib import Path
8from typing import Any
10import h5py
12# =============================================================================
13# Imports
14# =============================================================================
15import numpy as np
16from PIL import Image
18from mth5.groups.base import BaseGroup
21# =============================================================================
22# Reports Group
23# =============================================================================
24class ReportsGroup(BaseGroup):
25 """Store report files (PDF/text) and images under ``/Survey/Reports``.
27 Files are embedded into HDF5 datasets with basic metadata preserved.
29 Examples
30 --------
31 >>> reports = survey.reports_group
32 >>> _ = reports.add_report("site_report", filename="/tmp/report.pdf")
33 >>> _ = reports.get_report("site_report") # doctest: +SKIP
34 """
36 def __init__(self, group: h5py.Group, **kwargs: Any) -> None:
37 super().__init__(group, **kwargs)
38 self._accepted_reports: list[str] = ["pdf", "txt", "md"]
39 self._accepted_images: list[str] = ["png", "jpg", "jpeg", "tif", "tiff", "bmp"]
41 # summary of reports
42 self._defaults_summary_attrs = {
43 "name": "summary",
44 "max_shape": (1000,),
45 "dtype": np.dtype(
46 [
47 ("name", "S5"),
48 ("type", "S32"),
49 ("summary", "S200"),
50 ("hdf5_reference", h5py.ref_dtype),
51 ]
52 ),
53 }
55 def add_report(
56 self,
57 report_name: str,
58 report_metadata: dict[str, Any] | None = None,
59 filename: str | Path | None = None,
60 ) -> None:
61 """Add a report or image file to the group.
63 Parameters
64 ----------
65 report_name : str
66 Dataset name to store the file under.
67 report_metadata : dict, optional
68 Additional attributes to attach to the dataset.
69 filename : str or Path, optional
70 Path to the file to embed; supported types: PDF/TXT/MD and common images.
72 Raises
73 ------
74 FileNotFoundError
75 If ``filename`` does not exist.
77 Examples
78 --------
79 >>> reports.add_report("manual", filename="docs/manual.pdf") # doctest: +SKIP
80 """
82 if filename is not None:
83 filename = Path(filename)
84 if not filename.exists():
85 raise FileNotFoundError(f"{filename} does not exist")
86 extension = filename.suffix.lower()[1:]
87 if extension in self._accepted_reports:
88 fn_bytes = filename.read_bytes()
90 # Save PDF bytes into HDF5
91 dataset = self.hdf5_group.create_dataset(report_name, data=fn_bytes)
93 # Add metadata if provided
94 if report_metadata is not None:
95 for key, value in report_metadata.items():
96 dataset.attrs[key] = value
97 else:
98 dataset.attrs["description"] = f"{extension.upper()} report file"
99 dataset.attrs["filename"] = filename.name
100 dataset.attrs["file_type"] = extension
101 elif extension in self._accepted_images:
102 # Open image and convert to numpy array
103 img = Image.open(filename)
104 img_data = np.array(img)
106 # Save image data into HDF5
107 dataset = self.hdf5_group.create_dataset(report_name, data=img_data)
109 # Add metadata if provided
110 if report_metadata is not None:
111 for key, value in report_metadata.items():
112 dataset.attrs[key] = value
113 else:
114 dataset.attrs["description"] = f"{extension.upper()} image file"
115 dataset.attrs["filename"] = filename.name
116 dataset.attrs["file_type"] = extension
117 else:
118 self.logger.error(
119 f"Adding files of type {extension} is not implemented yet"
120 )
122 def get_report(self, report_name: str) -> Path:
123 """Extract a stored report or image to the current working directory.
125 Parameters
126 ----------
127 report_name : str
128 Name of the stored dataset.
130 Returns
131 -------
132 pathlib.Path
133 Path to the materialized file on disk.
135 Raises
136 ------
137 ValueError
138 If the stored file type is unsupported.
140 Examples
141 --------
142 >>> path = reports.get_report("site_report") # doctest: +SKIP
143 >>> path.exists()
144 True
145 """
147 dataset = self.hdf5_group[report_name]
148 file_type = dataset.attrs["file_type"]
150 if file_type in self._accepted_reports:
151 report_data = bytes(dataset[()])
152 fn_path = Path().cwd().joinpath(dataset.attrs["filename"])
153 fn_path.write_bytes(report_data)
154 self.logger.info(f"Report written to {fn_path}")
155 return fn_path
157 if file_type in self._accepted_images:
158 img_data = np.array(dataset[()])
159 img = Image.fromarray(img_data)
160 fn_path = Path().cwd().joinpath(dataset.attrs["filename"])
161 img.save(fn_path)
162 self.logger.info(f"Image report written to {fn_path}")
163 return fn_path
165 raise ValueError(f"Unsupported file type '{file_type}' for {report_name}")