trnbl.loggers.local.build_dist
1from pathlib import Path 2import base64 3from typing import Literal, overload 4import requests # type: ignore[import-untyped] 5import json 6 7from bs4 import BeautifulSoup, Tag # type: ignore[import-untyped] 8 9 10@overload 11def get_remote( 12 path_or_url: str, 13 download_remote: bool = False, 14 get_bytes: Literal[False] = False, 15 allow_remote_fail: Literal[False] = False, 16) -> str: ... 17@overload 18def get_remote( 19 path_or_url: str, 20 download_remote: bool = False, 21 get_bytes: Literal[True] = True, 22 allow_remote_fail: Literal[False] = False, 23) -> bytes: ... 24@overload 25def get_remote( 26 path_or_url: str, 27 download_remote: bool = False, 28 get_bytes: Literal[False] = False, 29 allow_remote_fail: bool = False, 30) -> str | None: ... 31@overload 32def get_remote( 33 path_or_url: str, 34 download_remote: bool = False, 35 get_bytes: Literal[True] = True, 36 allow_remote_fail: bool = False, 37) -> bytes | None: ... 38def get_remote( 39 path_or_url: str, 40 download_remote: bool = False, 41 get_bytes: bool = False, 42 allow_remote_fail: bool = True, 43) -> str | bytes | None: 44 """gets a resource from a path or url 45 46 - returns a string by default, or bytes if `get_bytes` is `True` 47 - returns `None` if its from the web and `download_remote` is `False` 48 49 # Parameters: 50 - `path_or_url : str` 51 location of the resource. if it starts with `http`, it is considered a url 52 - `download_remote : bool` 53 whether to download the resource if it is a url 54 (defaults to `False`) 55 - `get_bytes : bool` 56 whether to return the resource as bytes 57 (defaults to `False`) 58 - `allow_remote_fail : bool` 59 if a remote resource fails to download, return `None`. if this is `False`, raise an exception 60 (defaults to `True`) 61 62 # Raises: 63 - `requests.HTTPError` 64 if the remote resource returns an error, and `allow_remote_fail` is `False` 65 66 # Returns: 67 - `str|bytes|None` 68 """ 69 if path_or_url.startswith("http"): 70 if download_remote: 71 try: 72 response: requests.Response = requests.get(path_or_url) 73 response.raise_for_status() 74 except Exception as e: 75 if allow_remote_fail: 76 return None 77 else: 78 raise e 79 if get_bytes: 80 return response.content 81 else: 82 return response.text 83 else: 84 return None 85 else: 86 path: Path = Path(path_or_url) 87 if get_bytes: 88 return path.read_bytes() 89 else: 90 return path.read_text(encoding="utf-8") 91 92 93def build_dist( 94 path: Path, 95 minify: bool = True, 96 download_remote: bool = True, 97) -> str: 98 """Build a single file html from a folder 99 100 partially from https://stackoverflow.com/questions/44646481/merging-js-css-html-into-single-html 101 """ 102 original_html_text: str = Path(path).read_text(encoding="utf-8") 103 soup: BeautifulSoup = BeautifulSoup(original_html_text, features="html.parser") 104 105 # Find link tags. example: <link rel="stylesheet" href="css/somestyle.css"> 106 # also handles favicon 107 for tag in soup.find_all("link", href=True): 108 if tag.has_attr("href"): 109 file_content: str | bytes | None = get_remote( 110 tag["href"], 111 download_remote=download_remote, 112 get_bytes=tag.get("rel") == ["icon"], # assume text if not icon 113 ) 114 115 if file_content is not None: 116 # remove the tag from soup 117 tag.extract() 118 119 if tag.get("rel") == ["stylesheet"]: 120 # insert style element for CSS 121 new_style: Tag = soup.new_tag("style") 122 new_style.string = file_content 123 soup.html.head.append(new_style) 124 elif tag.get("rel") == ["icon"]: 125 # handle favicon 126 mime_type = "image/x-icon" # default mime type for favicon 127 if tag["href"].lower().endswith(".png"): 128 mime_type = "image/png" 129 elif tag["href"].lower().endswith(".ico"): 130 mime_type = "image/x-icon" 131 132 base64_content = base64.b64encode(file_content).decode("ascii") # type: ignore[arg-type] 133 new_link: Tag = soup.new_tag( 134 "link", 135 rel="icon", 136 href=f"data:{mime_type};base64,{base64_content}", 137 ) 138 soup.html.head.append(new_link) 139 140 # Find script tags. example: <script src="js/somescript.js"></script> 141 for tag in soup.find_all("script", src=True): 142 if tag.has_attr("src"): 143 file_text: str | bytes | None = get_remote( 144 tag["src"], 145 download_remote=download_remote, 146 ) 147 148 if file_text is not None: 149 # remove the tag from soup 150 tag.extract() 151 152 # insert script element 153 new_script: Tag = soup.new_tag("script") 154 new_script.string = file_text 155 soup.html.head.append(new_script) 156 157 # Find image tags. example: <img src="images/img1.png"> 158 for tag in soup.find_all("img", src=True): 159 if tag.has_attr("src"): 160 img_content: bytes | None = get_remote( 161 tag["src"], download_remote=download_remote, get_bytes=True 162 ) 163 164 if img_content is not None: 165 # replace filename with base64 of the content of the file 166 base64_img_content: bytes = base64.b64encode(img_content) # type: ignore[arg-type] 167 tag["src"] = "data:image/png;base64, {}".format( 168 base64_img_content.decode("ascii") 169 ) 170 171 out_html: str = str(soup) 172 173 if minify: 174 import minify_html # type: ignore[import-untyped] 175 176 out_html = minify_html.minify(out_html, minify_css=True, minify_js=True) 177 178 return out_html 179 180 181def main() -> None: 182 # parse args 183 import argparse 184 185 parser: argparse.ArgumentParser = argparse.ArgumentParser( 186 description="Build a single file HTML from a folder" 187 ) 188 parser.add_argument("path", type=str, help="Path to the HTML file or folder") 189 parser.add_argument( 190 "--output", "-o", type=str, help="Output file path (default: print to console)" 191 ) 192 parser.add_argument("--no-minify", action="store_true", help="Disable minification") 193 parser.add_argument( 194 "--download", 195 "-d", 196 action="store_true", 197 help="Disable downloading remote resources", 198 ) 199 parser.add_argument("--json", "-j", action="store_true", help="Output as JSON") 200 parser.add_argument( 201 "--pkg-info", 202 "-p", 203 type=str, 204 help="Add a comment with info from the given `pyproject.toml` file", 205 ) 206 207 args: argparse.Namespace = parser.parse_args() 208 209 input_path: Path = Path(args.path) 210 if not input_path.exists(): 211 raise FileNotFoundError(f"Path {input_path} does not exist") 212 213 # build page 214 result: str = build_dist( 215 path=input_path, 216 minify=not args.no_minify, 217 download_remote=args.download, 218 ) 219 220 # add package info 221 if args.pkg_info: 222 try: 223 import tomllib # type: ignore 224 except ImportError: 225 import tomli as tomllib # type: ignore 226 227 # read pyproject.toml 228 with open(args.pkg_info, "rb") as f: 229 pkg_info = tomllib.load(f) 230 # get package name and version 231 pkg_name: str = pkg_info["project"].get("name", "") 232 pkg_version: str = pkg_info["project"].get("version", "") 233 pkg_homepage: str = pkg_info["project"].get("urls", {}).get("Homepage", "") 234 # add comment 235 result = f"<!-- {pkg_name} v{pkg_version} {pkg_homepage} -->\n" + result 236 237 # output as JSON 238 if args.json: 239 result = json.dumps(result) 240 241 # print or save 242 output_path = args.output or None 243 if output_path is None: 244 print(result) 245 else: 246 with open(output_path, "w", encoding="utf-8") as f: 247 f.write(result) 248 249 250if __name__ == "__main__": 251 main()
def
get_remote( path_or_url: str, download_remote: bool = False, get_bytes: bool = False, allow_remote_fail: bool = True) -> str | bytes | None:
39def get_remote( 40 path_or_url: str, 41 download_remote: bool = False, 42 get_bytes: bool = False, 43 allow_remote_fail: bool = True, 44) -> str | bytes | None: 45 """gets a resource from a path or url 46 47 - returns a string by default, or bytes if `get_bytes` is `True` 48 - returns `None` if its from the web and `download_remote` is `False` 49 50 # Parameters: 51 - `path_or_url : str` 52 location of the resource. if it starts with `http`, it is considered a url 53 - `download_remote : bool` 54 whether to download the resource if it is a url 55 (defaults to `False`) 56 - `get_bytes : bool` 57 whether to return the resource as bytes 58 (defaults to `False`) 59 - `allow_remote_fail : bool` 60 if a remote resource fails to download, return `None`. if this is `False`, raise an exception 61 (defaults to `True`) 62 63 # Raises: 64 - `requests.HTTPError` 65 if the remote resource returns an error, and `allow_remote_fail` is `False` 66 67 # Returns: 68 - `str|bytes|None` 69 """ 70 if path_or_url.startswith("http"): 71 if download_remote: 72 try: 73 response: requests.Response = requests.get(path_or_url) 74 response.raise_for_status() 75 except Exception as e: 76 if allow_remote_fail: 77 return None 78 else: 79 raise e 80 if get_bytes: 81 return response.content 82 else: 83 return response.text 84 else: 85 return None 86 else: 87 path: Path = Path(path_or_url) 88 if get_bytes: 89 return path.read_bytes() 90 else: 91 return path.read_text(encoding="utf-8")
gets a resource from a path or url
- returns a string by default, or bytes if
get_bytes
isTrue
- returns
None
if its from the web anddownload_remote
isFalse
Parameters:
path_or_url : str
location of the resource. if it starts withhttp
, it is considered a urldownload_remote : bool
whether to download the resource if it is a url (defaults toFalse
)get_bytes : bool
whether to return the resource as bytes (defaults toFalse
)allow_remote_fail : bool
if a remote resource fails to download, returnNone
. if this isFalse
, raise an exception (defaults toTrue
)
Raises:
requests.HTTPError
if the remote resource returns an error, andallow_remote_fail
isFalse
Returns:
str|bytes|None
def
build_dist( path: pathlib.Path, minify: bool = True, download_remote: bool = True) -> str:
94def build_dist( 95 path: Path, 96 minify: bool = True, 97 download_remote: bool = True, 98) -> str: 99 """Build a single file html from a folder 100 101 partially from https://stackoverflow.com/questions/44646481/merging-js-css-html-into-single-html 102 """ 103 original_html_text: str = Path(path).read_text(encoding="utf-8") 104 soup: BeautifulSoup = BeautifulSoup(original_html_text, features="html.parser") 105 106 # Find link tags. example: <link rel="stylesheet" href="css/somestyle.css"> 107 # also handles favicon 108 for tag in soup.find_all("link", href=True): 109 if tag.has_attr("href"): 110 file_content: str | bytes | None = get_remote( 111 tag["href"], 112 download_remote=download_remote, 113 get_bytes=tag.get("rel") == ["icon"], # assume text if not icon 114 ) 115 116 if file_content is not None: 117 # remove the tag from soup 118 tag.extract() 119 120 if tag.get("rel") == ["stylesheet"]: 121 # insert style element for CSS 122 new_style: Tag = soup.new_tag("style") 123 new_style.string = file_content 124 soup.html.head.append(new_style) 125 elif tag.get("rel") == ["icon"]: 126 # handle favicon 127 mime_type = "image/x-icon" # default mime type for favicon 128 if tag["href"].lower().endswith(".png"): 129 mime_type = "image/png" 130 elif tag["href"].lower().endswith(".ico"): 131 mime_type = "image/x-icon" 132 133 base64_content = base64.b64encode(file_content).decode("ascii") # type: ignore[arg-type] 134 new_link: Tag = soup.new_tag( 135 "link", 136 rel="icon", 137 href=f"data:{mime_type};base64,{base64_content}", 138 ) 139 soup.html.head.append(new_link) 140 141 # Find script tags. example: <script src="js/somescript.js"></script> 142 for tag in soup.find_all("script", src=True): 143 if tag.has_attr("src"): 144 file_text: str | bytes | None = get_remote( 145 tag["src"], 146 download_remote=download_remote, 147 ) 148 149 if file_text is not None: 150 # remove the tag from soup 151 tag.extract() 152 153 # insert script element 154 new_script: Tag = soup.new_tag("script") 155 new_script.string = file_text 156 soup.html.head.append(new_script) 157 158 # Find image tags. example: <img src="images/img1.png"> 159 for tag in soup.find_all("img", src=True): 160 if tag.has_attr("src"): 161 img_content: bytes | None = get_remote( 162 tag["src"], download_remote=download_remote, get_bytes=True 163 ) 164 165 if img_content is not None: 166 # replace filename with base64 of the content of the file 167 base64_img_content: bytes = base64.b64encode(img_content) # type: ignore[arg-type] 168 tag["src"] = "data:image/png;base64, {}".format( 169 base64_img_content.decode("ascii") 170 ) 171 172 out_html: str = str(soup) 173 174 if minify: 175 import minify_html # type: ignore[import-untyped] 176 177 out_html = minify_html.minify(out_html, minify_css=True, minify_js=True) 178 179 return out_html
Build a single file html from a folder
partially from https://stackoverflow.com/questions/44646481/merging-js-css-html-into-single-html
def
main() -> None:
182def main() -> None: 183 # parse args 184 import argparse 185 186 parser: argparse.ArgumentParser = argparse.ArgumentParser( 187 description="Build a single file HTML from a folder" 188 ) 189 parser.add_argument("path", type=str, help="Path to the HTML file or folder") 190 parser.add_argument( 191 "--output", "-o", type=str, help="Output file path (default: print to console)" 192 ) 193 parser.add_argument("--no-minify", action="store_true", help="Disable minification") 194 parser.add_argument( 195 "--download", 196 "-d", 197 action="store_true", 198 help="Disable downloading remote resources", 199 ) 200 parser.add_argument("--json", "-j", action="store_true", help="Output as JSON") 201 parser.add_argument( 202 "--pkg-info", 203 "-p", 204 type=str, 205 help="Add a comment with info from the given `pyproject.toml` file", 206 ) 207 208 args: argparse.Namespace = parser.parse_args() 209 210 input_path: Path = Path(args.path) 211 if not input_path.exists(): 212 raise FileNotFoundError(f"Path {input_path} does not exist") 213 214 # build page 215 result: str = build_dist( 216 path=input_path, 217 minify=not args.no_minify, 218 download_remote=args.download, 219 ) 220 221 # add package info 222 if args.pkg_info: 223 try: 224 import tomllib # type: ignore 225 except ImportError: 226 import tomli as tomllib # type: ignore 227 228 # read pyproject.toml 229 with open(args.pkg_info, "rb") as f: 230 pkg_info = tomllib.load(f) 231 # get package name and version 232 pkg_name: str = pkg_info["project"].get("name", "") 233 pkg_version: str = pkg_info["project"].get("version", "") 234 pkg_homepage: str = pkg_info["project"].get("urls", {}).get("Homepage", "") 235 # add comment 236 result = f"<!-- {pkg_name} v{pkg_version} {pkg_homepage} -->\n" + result 237 238 # output as JSON 239 if args.json: 240 result = json.dumps(result) 241 242 # print or save 243 output_path = args.output or None 244 if output_path is None: 245 print(result) 246 else: 247 with open(output_path, "w", encoding="utf-8") as f: 248 f.write(result)