Coverage for trnbl\loggers\local\build_dist.py: 0%

106 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-17 02:46 -0700

1from pathlib import Path 

2import base64 

3from typing import Literal, overload 

4import requests # type: ignore[import-untyped] 

5import json 

6 

7from bs4 import BeautifulSoup, Tag # type: ignore[import-untyped] 

8 

9 

10@overload 

11def get_remote( 

12 path_or_url: str, 

13 download_remote: bool = False, 

14 get_bytes: Literal[False] = False, 

15 allow_remote_fail: Literal[False] = False, 

16) -> str: ... 

17@overload 

18def get_remote( 

19 path_or_url: str, 

20 download_remote: bool = False, 

21 get_bytes: Literal[True] = True, 

22 allow_remote_fail: Literal[False] = False, 

23) -> bytes: ... 

24@overload 

25def get_remote( 

26 path_or_url: str, 

27 download_remote: bool = False, 

28 get_bytes: Literal[False] = False, 

29 allow_remote_fail: bool = False, 

30) -> str | None: ... 

31@overload 

32def get_remote( 

33 path_or_url: str, 

34 download_remote: bool = False, 

35 get_bytes: Literal[True] = True, 

36 allow_remote_fail: bool = False, 

37) -> bytes | None: ... 

38def get_remote( 

39 path_or_url: str, 

40 download_remote: bool = False, 

41 get_bytes: bool = False, 

42 allow_remote_fail: bool = True, 

43) -> str | bytes | None: 

44 """gets a resource from a path or url 

45 

46 - returns a string by default, or bytes if `get_bytes` is `True` 

47 - returns `None` if its from the web and `download_remote` is `False` 

48 

49 # Parameters: 

50 - `path_or_url : str` 

51 location of the resource. if it starts with `http`, it is considered a url 

52 - `download_remote : bool` 

53 whether to download the resource if it is a url 

54 (defaults to `False`) 

55 - `get_bytes : bool` 

56 whether to return the resource as bytes 

57 (defaults to `False`) 

58 - `allow_remote_fail : bool` 

59 if a remote resource fails to download, return `None`. if this is `False`, raise an exception 

60 (defaults to `True`) 

61 

62 # Raises: 

63 - `requests.HTTPError` 

64 if the remote resource returns an error, and `allow_remote_fail` is `False` 

65 

66 # Returns: 

67 - `str|bytes|None` 

68 """ 

69 if path_or_url.startswith("http"): 

70 if download_remote: 

71 try: 

72 response: requests.Response = requests.get(path_or_url) 

73 response.raise_for_status() 

74 except Exception as e: 

75 if allow_remote_fail: 

76 return None 

77 else: 

78 raise e 

79 if get_bytes: 

80 return response.content 

81 else: 

82 return response.text 

83 else: 

84 return None 

85 else: 

86 path: Path = Path(path_or_url) 

87 if get_bytes: 

88 return path.read_bytes() 

89 else: 

90 return path.read_text(encoding="utf-8") 

91 

92 

93def build_dist( 

94 path: Path, 

95 minify: bool = True, 

96 download_remote: bool = True, 

97) -> str: 

98 """Build a single file html from a folder 

99 

100 partially from https://stackoverflow.com/questions/44646481/merging-js-css-html-into-single-html 

101 """ 

102 original_html_text: str = Path(path).read_text(encoding="utf-8") 

103 soup: BeautifulSoup = BeautifulSoup(original_html_text, features="html.parser") 

104 

105 # Find link tags. example: <link rel="stylesheet" href="css/somestyle.css"> 

106 # also handles favicon 

107 for tag in soup.find_all("link", href=True): 

108 if tag.has_attr("href"): 

109 file_content: str | bytes | None = get_remote( 

110 tag["href"], 

111 download_remote=download_remote, 

112 get_bytes=tag.get("rel") == ["icon"], # assume text if not icon 

113 ) 

114 

115 if file_content is not None: 

116 # remove the tag from soup 

117 tag.extract() 

118 

119 if tag.get("rel") == ["stylesheet"]: 

120 # insert style element for CSS 

121 new_style: Tag = soup.new_tag("style") 

122 new_style.string = file_content 

123 soup.html.head.append(new_style) 

124 elif tag.get("rel") == ["icon"]: 

125 # handle favicon 

126 mime_type = "image/x-icon" # default mime type for favicon 

127 if tag["href"].lower().endswith(".png"): 

128 mime_type = "image/png" 

129 elif tag["href"].lower().endswith(".ico"): 

130 mime_type = "image/x-icon" 

131 

132 base64_content = base64.b64encode(file_content).decode("ascii") # type: ignore[arg-type] 

133 new_link: Tag = soup.new_tag( 

134 "link", 

135 rel="icon", 

136 href=f"data:{mime_type};base64,{base64_content}", 

137 ) 

138 soup.html.head.append(new_link) 

139 

140 # Find script tags. example: <script src="js/somescript.js"></script> 

141 for tag in soup.find_all("script", src=True): 

142 if tag.has_attr("src"): 

143 file_text: str | bytes | None = get_remote( 

144 tag["src"], 

145 download_remote=download_remote, 

146 ) 

147 

148 if file_text is not None: 

149 # remove the tag from soup 

150 tag.extract() 

151 

152 # insert script element 

153 new_script: Tag = soup.new_tag("script") 

154 new_script.string = file_text 

155 soup.html.head.append(new_script) 

156 

157 # Find image tags. example: <img src="images/img1.png"> 

158 for tag in soup.find_all("img", src=True): 

159 if tag.has_attr("src"): 

160 img_content: bytes | None = get_remote( 

161 tag["src"], download_remote=download_remote, get_bytes=True 

162 ) 

163 

164 if img_content is not None: 

165 # replace filename with base64 of the content of the file 

166 base64_img_content: bytes = base64.b64encode(img_content) # type: ignore[arg-type] 

167 tag["src"] = "data:image/png;base64, {}".format( 

168 base64_img_content.decode("ascii") 

169 ) 

170 

171 out_html: str = str(soup) 

172 

173 if minify: 

174 import minify_html # type: ignore[import-untyped] 

175 

176 out_html = minify_html.minify(out_html, minify_css=True, minify_js=True) 

177 

178 return out_html 

179 

180 

181def main() -> None: 

182 # parse args 

183 import argparse 

184 

185 parser: argparse.ArgumentParser = argparse.ArgumentParser( 

186 description="Build a single file HTML from a folder" 

187 ) 

188 parser.add_argument("path", type=str, help="Path to the HTML file or folder") 

189 parser.add_argument( 

190 "--output", "-o", type=str, help="Output file path (default: print to console)" 

191 ) 

192 parser.add_argument("--no-minify", action="store_true", help="Disable minification") 

193 parser.add_argument( 

194 "--download", 

195 "-d", 

196 action="store_true", 

197 help="Disable downloading remote resources", 

198 ) 

199 parser.add_argument("--json", "-j", action="store_true", help="Output as JSON") 

200 parser.add_argument( 

201 "--pkg-info", 

202 "-p", 

203 type=str, 

204 help="Add a comment with info from the given `pyproject.toml` file", 

205 ) 

206 

207 args: argparse.Namespace = parser.parse_args() 

208 

209 input_path: Path = Path(args.path) 

210 if not input_path.exists(): 

211 raise FileNotFoundError(f"Path {input_path} does not exist") 

212 

213 # build page 

214 result: str = build_dist( 

215 path=input_path, 

216 minify=not args.no_minify, 

217 download_remote=args.download, 

218 ) 

219 

220 # add package info 

221 if args.pkg_info: 

222 try: 

223 import tomllib # type: ignore 

224 except ImportError: 

225 import tomli as tomllib # type: ignore 

226 

227 # read pyproject.toml 

228 with open(args.pkg_info, "rb") as f: 

229 pkg_info = tomllib.load(f) 

230 # get package name and version 

231 pkg_name: str = pkg_info["project"].get("name", "") 

232 pkg_version: str = pkg_info["project"].get("version", "") 

233 pkg_homepage: str = pkg_info["project"].get("urls", {}).get("Homepage", "") 

234 # add comment 

235 result = f"<!-- {pkg_name} v{pkg_version} {pkg_homepage} -->\n" + result 

236 

237 # output as JSON 

238 if args.json: 

239 result = json.dumps(result) 

240 

241 # print or save 

242 output_path = args.output or None 

243 if output_path is None: 

244 print(result) 

245 else: 

246 with open(output_path, "w", encoding="utf-8") as f: 

247 f.write(result) 

248 

249 

250if __name__ == "__main__": 

251 main()