Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ groups \ reports.py: 28%

57 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-27 20:09 -0800

1# -*- coding: utf-8 -*- 

2from __future__ import annotations 

3 

4 

5"""Reports group utilities for storing report and image artifacts in MTH5.""" 

6 

7from pathlib import Path 

8from typing import Any 

9 

10import h5py 

11 

12# ============================================================================= 

13# Imports 

14# ============================================================================= 

15import numpy as np 

16from PIL import Image 

17 

18from mth5.groups.base import BaseGroup 

19 

20 

21# ============================================================================= 

22# Reports Group 

23# ============================================================================= 

24class ReportsGroup(BaseGroup): 

25 """Store report files (PDF/text) and images under ``/Survey/Reports``. 

26 

27 Files are embedded into HDF5 datasets with basic metadata preserved. 

28 

29 Examples 

30 -------- 

31 >>> reports = survey.reports_group 

32 >>> _ = reports.add_report("site_report", filename="/tmp/report.pdf") 

33 >>> _ = reports.get_report("site_report") # doctest: +SKIP 

34 """ 

35 

36 def __init__(self, group: h5py.Group, **kwargs: Any) -> None: 

37 super().__init__(group, **kwargs) 

38 self._accepted_reports: list[str] = ["pdf", "txt", "md"] 

39 self._accepted_images: list[str] = ["png", "jpg", "jpeg", "tif", "tiff", "bmp"] 

40 

41 # summary of reports 

42 self._defaults_summary_attrs = { 

43 "name": "summary", 

44 "max_shape": (1000,), 

45 "dtype": np.dtype( 

46 [ 

47 ("name", "S5"), 

48 ("type", "S32"), 

49 ("summary", "S200"), 

50 ("hdf5_reference", h5py.ref_dtype), 

51 ] 

52 ), 

53 } 

54 

55 def add_report( 

56 self, 

57 report_name: str, 

58 report_metadata: dict[str, Any] | None = None, 

59 filename: str | Path | None = None, 

60 ) -> None: 

61 """Add a report or image file to the group. 

62 

63 Parameters 

64 ---------- 

65 report_name : str 

66 Dataset name to store the file under. 

67 report_metadata : dict, optional 

68 Additional attributes to attach to the dataset. 

69 filename : str or Path, optional 

70 Path to the file to embed; supported types: PDF/TXT/MD and common images. 

71 

72 Raises 

73 ------ 

74 FileNotFoundError 

75 If ``filename`` does not exist. 

76 

77 Examples 

78 -------- 

79 >>> reports.add_report("manual", filename="docs/manual.pdf") # doctest: +SKIP 

80 """ 

81 

82 if filename is not None: 

83 filename = Path(filename) 

84 if not filename.exists(): 

85 raise FileNotFoundError(f"{filename} does not exist") 

86 extension = filename.suffix.lower()[1:] 

87 if extension in self._accepted_reports: 

88 fn_bytes = filename.read_bytes() 

89 

90 # Save PDF bytes into HDF5 

91 dataset = self.hdf5_group.create_dataset(report_name, data=fn_bytes) 

92 

93 # Add metadata if provided 

94 if report_metadata is not None: 

95 for key, value in report_metadata.items(): 

96 dataset.attrs[key] = value 

97 else: 

98 dataset.attrs["description"] = f"{extension.upper()} report file" 

99 dataset.attrs["filename"] = filename.name 

100 dataset.attrs["file_type"] = extension 

101 elif extension in self._accepted_images: 

102 # Open image and convert to numpy array 

103 img = Image.open(filename) 

104 img_data = np.array(img) 

105 

106 # Save image data into HDF5 

107 dataset = self.hdf5_group.create_dataset(report_name, data=img_data) 

108 

109 # Add metadata if provided 

110 if report_metadata is not None: 

111 for key, value in report_metadata.items(): 

112 dataset.attrs[key] = value 

113 else: 

114 dataset.attrs["description"] = f"{extension.upper()} image file" 

115 dataset.attrs["filename"] = filename.name 

116 dataset.attrs["file_type"] = extension 

117 else: 

118 self.logger.error( 

119 f"Adding files of type {extension} is not implemented yet" 

120 ) 

121 

122 def get_report(self, report_name: str) -> Path: 

123 """Extract a stored report or image to the current working directory. 

124 

125 Parameters 

126 ---------- 

127 report_name : str 

128 Name of the stored dataset. 

129 

130 Returns 

131 ------- 

132 pathlib.Path 

133 Path to the materialized file on disk. 

134 

135 Raises 

136 ------ 

137 ValueError 

138 If the stored file type is unsupported. 

139 

140 Examples 

141 -------- 

142 >>> path = reports.get_report("site_report") # doctest: +SKIP 

143 >>> path.exists() 

144 True 

145 """ 

146 

147 dataset = self.hdf5_group[report_name] 

148 file_type = dataset.attrs["file_type"] 

149 

150 if file_type in self._accepted_reports: 

151 report_data = bytes(dataset[()]) 

152 fn_path = Path().cwd().joinpath(dataset.attrs["filename"]) 

153 fn_path.write_bytes(report_data) 

154 self.logger.info(f"Report written to {fn_path}") 

155 return fn_path 

156 

157 if file_type in self._accepted_images: 

158 img_data = np.array(dataset[()]) 

159 img = Image.fromarray(img_data) 

160 fn_path = Path().cwd().joinpath(dataset.attrs["filename"]) 

161 img.save(fn_path) 

162 self.logger.info(f"Image report written to {fn_path}") 

163 return fn_path 

164 

165 raise ValueError(f"Unsupported file type '{file_type}' for {report_name}")