docs for trnbl v0.1.1
View Source on GitHub

trnbl.loggers.local.build_dist


  1from pathlib import Path
  2import base64
  3from typing import Literal, overload
  4import requests  # type: ignore[import-untyped]
  5import json
  6
  7from bs4 import BeautifulSoup, Tag  # type: ignore[import-untyped]
  8
  9
 10@overload
 11def get_remote(
 12	path_or_url: str,
 13	download_remote: bool = False,
 14	get_bytes: Literal[False] = False,
 15	allow_remote_fail: Literal[False] = False,
 16) -> str: ...
 17@overload
 18def get_remote(
 19	path_or_url: str,
 20	download_remote: bool = False,
 21	get_bytes: Literal[True] = True,
 22	allow_remote_fail: Literal[False] = False,
 23) -> bytes: ...
 24@overload
 25def get_remote(
 26	path_or_url: str,
 27	download_remote: bool = False,
 28	get_bytes: Literal[False] = False,
 29	allow_remote_fail: bool = False,
 30) -> str | None: ...
 31@overload
 32def get_remote(
 33	path_or_url: str,
 34	download_remote: bool = False,
 35	get_bytes: Literal[True] = True,
 36	allow_remote_fail: bool = False,
 37) -> bytes | None: ...
 38def get_remote(
 39	path_or_url: str,
 40	download_remote: bool = False,
 41	get_bytes: bool = False,
 42	allow_remote_fail: bool = True,
 43) -> str | bytes | None:
 44	"""gets a resource from a path or url
 45
 46	- returns a string by default, or bytes if `get_bytes` is `True`
 47	- returns `None` if its from the web and `download_remote` is `False`
 48
 49	# Parameters:
 50	 - `path_or_url : str`
 51	   location of the resource. if it starts with `http`, it is considered a url
 52	 - `download_remote : bool`
 53	   whether to download the resource if it is a url
 54	   (defaults to `False`)
 55	 - `get_bytes : bool`
 56	   whether to return the resource as bytes
 57	   (defaults to `False`)
 58	 - `allow_remote_fail : bool`
 59	   if a remote resource fails to download, return `None`. if this is `False`, raise an exception
 60	   (defaults to `True`)
 61
 62	# Raises:
 63	 - `requests.HTTPError`
 64	   if the remote resource returns an error, and `allow_remote_fail` is `False`
 65
 66	# Returns:
 67	 - `str|bytes|None`
 68	"""
 69	if path_or_url.startswith("http"):
 70		if download_remote:
 71			try:
 72				response: requests.Response = requests.get(path_or_url)
 73				response.raise_for_status()
 74			except Exception as e:
 75				if allow_remote_fail:
 76					return None
 77				else:
 78					raise e
 79			if get_bytes:
 80				return response.content
 81			else:
 82				return response.text
 83		else:
 84			return None
 85	else:
 86		path: Path = Path(path_or_url)
 87		if get_bytes:
 88			return path.read_bytes()
 89		else:
 90			return path.read_text(encoding="utf-8")
 91
 92
 93def build_dist(
 94	path: Path,
 95	minify: bool = True,
 96	download_remote: bool = True,
 97) -> str:
 98	"""Build a single file html from a folder
 99
100	partially from https://stackoverflow.com/questions/44646481/merging-js-css-html-into-single-html
101	"""
102	original_html_text: str = Path(path).read_text(encoding="utf-8")
103	soup: BeautifulSoup = BeautifulSoup(original_html_text, features="html.parser")
104
105	# Find link tags. example: <link rel="stylesheet" href="css/somestyle.css">
106	# also handles favicon
107	for tag in soup.find_all("link", href=True):
108		if tag.has_attr("href"):
109			file_content: str | bytes | None = get_remote(
110				tag["href"],
111				download_remote=download_remote,
112				get_bytes=tag.get("rel") == ["icon"],  # assume text if not icon
113			)
114
115			if file_content is not None:
116				# remove the tag from soup
117				tag.extract()
118
119				if tag.get("rel") == ["stylesheet"]:
120					# insert style element for CSS
121					new_style: Tag = soup.new_tag("style")
122					new_style.string = file_content
123					soup.html.head.append(new_style)
124				elif tag.get("rel") == ["icon"]:
125					# handle favicon
126					mime_type = "image/x-icon"  # default mime type for favicon
127					if tag["href"].lower().endswith(".png"):
128						mime_type = "image/png"
129					elif tag["href"].lower().endswith(".ico"):
130						mime_type = "image/x-icon"
131
132					base64_content = base64.b64encode(file_content).decode("ascii")  # type: ignore[arg-type]
133					new_link: Tag = soup.new_tag(
134						"link",
135						rel="icon",
136						href=f"data:{mime_type};base64,{base64_content}",
137					)
138					soup.html.head.append(new_link)
139
140	# Find script tags. example: <script src="js/somescript.js"></script>
141	for tag in soup.find_all("script", src=True):
142		if tag.has_attr("src"):
143			file_text: str | bytes | None = get_remote(
144				tag["src"],
145				download_remote=download_remote,
146			)
147
148			if file_text is not None:
149				# remove the tag from soup
150				tag.extract()
151
152				# insert script element
153				new_script: Tag = soup.new_tag("script")
154				new_script.string = file_text
155				soup.html.head.append(new_script)
156
157	# Find image tags. example: <img src="images/img1.png">
158	for tag in soup.find_all("img", src=True):
159		if tag.has_attr("src"):
160			img_content: bytes | None = get_remote(
161				tag["src"], download_remote=download_remote, get_bytes=True
162			)
163
164			if img_content is not None:
165				# replace filename with base64 of the content of the file
166				base64_img_content: bytes = base64.b64encode(img_content)  # type: ignore[arg-type]
167				tag["src"] = "data:image/png;base64, {}".format(
168					base64_img_content.decode("ascii")
169				)
170
171	out_html: str = str(soup)
172
173	if minify:
174		import minify_html  # type: ignore[import-untyped]
175
176		out_html = minify_html.minify(out_html, minify_css=True, minify_js=True)
177
178	return out_html
179
180
181def main() -> None:
182	# parse args
183	import argparse
184
185	parser: argparse.ArgumentParser = argparse.ArgumentParser(
186		description="Build a single file HTML from a folder"
187	)
188	parser.add_argument("path", type=str, help="Path to the HTML file or folder")
189	parser.add_argument(
190		"--output", "-o", type=str, help="Output file path (default: print to console)"
191	)
192	parser.add_argument("--no-minify", action="store_true", help="Disable minification")
193	parser.add_argument(
194		"--download",
195		"-d",
196		action="store_true",
197		help="Disable downloading remote resources",
198	)
199	parser.add_argument("--json", "-j", action="store_true", help="Output as JSON")
200	parser.add_argument(
201		"--pkg-info",
202		"-p",
203		type=str,
204		help="Add a comment with info from the given `pyproject.toml` file",
205	)
206
207	args: argparse.Namespace = parser.parse_args()
208
209	input_path: Path = Path(args.path)
210	if not input_path.exists():
211		raise FileNotFoundError(f"Path {input_path} does not exist")
212
213	# build page
214	result: str = build_dist(
215		path=input_path,
216		minify=not args.no_minify,
217		download_remote=args.download,
218	)
219
220	# add package info
221	if args.pkg_info:
222		try:
223			import tomllib  # type: ignore
224		except ImportError:
225			import tomli as tomllib  # type: ignore
226
227		# read pyproject.toml
228		with open(args.pkg_info, "rb") as f:
229			pkg_info = tomllib.load(f)
230		# get package name and version
231		pkg_name: str = pkg_info["project"].get("name", "")
232		pkg_version: str = pkg_info["project"].get("version", "")
233		pkg_homepage: str = pkg_info["project"].get("urls", {}).get("Homepage", "")
234		# add comment
235		result = f"<!-- {pkg_name} v{pkg_version} {pkg_homepage} -->\n" + result
236
237	# output as JSON
238	if args.json:
239		result = json.dumps(result)
240
241	# print or save
242	output_path = args.output or None
243	if output_path is None:
244		print(result)
245	else:
246		with open(output_path, "w", encoding="utf-8") as f:
247			f.write(result)
248
249
250if __name__ == "__main__":
251	main()

def get_remote( path_or_url: str, download_remote: bool = False, get_bytes: bool = False, allow_remote_fail: bool = True) -> str | bytes | None:
39def get_remote(
40	path_or_url: str,
41	download_remote: bool = False,
42	get_bytes: bool = False,
43	allow_remote_fail: bool = True,
44) -> str | bytes | None:
45	"""gets a resource from a path or url
46
47	- returns a string by default, or bytes if `get_bytes` is `True`
48	- returns `None` if its from the web and `download_remote` is `False`
49
50	# Parameters:
51	 - `path_or_url : str`
52	   location of the resource. if it starts with `http`, it is considered a url
53	 - `download_remote : bool`
54	   whether to download the resource if it is a url
55	   (defaults to `False`)
56	 - `get_bytes : bool`
57	   whether to return the resource as bytes
58	   (defaults to `False`)
59	 - `allow_remote_fail : bool`
60	   if a remote resource fails to download, return `None`. if this is `False`, raise an exception
61	   (defaults to `True`)
62
63	# Raises:
64	 - `requests.HTTPError`
65	   if the remote resource returns an error, and `allow_remote_fail` is `False`
66
67	# Returns:
68	 - `str|bytes|None`
69	"""
70	if path_or_url.startswith("http"):
71		if download_remote:
72			try:
73				response: requests.Response = requests.get(path_or_url)
74				response.raise_for_status()
75			except Exception as e:
76				if allow_remote_fail:
77					return None
78				else:
79					raise e
80			if get_bytes:
81				return response.content
82			else:
83				return response.text
84		else:
85			return None
86	else:
87		path: Path = Path(path_or_url)
88		if get_bytes:
89			return path.read_bytes()
90		else:
91			return path.read_text(encoding="utf-8")

gets a resource from a path or url

  • returns a string by default, or bytes if get_bytes is True
  • returns None if its from the web and download_remote is False

Parameters:

  • path_or_url : str location of the resource. if it starts with http, it is considered a url
  • download_remote : bool whether to download the resource if it is a url (defaults to False)
  • get_bytes : bool whether to return the resource as bytes (defaults to False)
  • allow_remote_fail : bool if a remote resource fails to download, return None. if this is False, raise an exception (defaults to True)

Raises:

  • requests.HTTPError if the remote resource returns an error, and allow_remote_fail is False

Returns:

  • str|bytes|None
def build_dist( path: pathlib.Path, minify: bool = True, download_remote: bool = True) -> str:
 94def build_dist(
 95	path: Path,
 96	minify: bool = True,
 97	download_remote: bool = True,
 98) -> str:
 99	"""Build a single file html from a folder
100
101	partially from https://stackoverflow.com/questions/44646481/merging-js-css-html-into-single-html
102	"""
103	original_html_text: str = Path(path).read_text(encoding="utf-8")
104	soup: BeautifulSoup = BeautifulSoup(original_html_text, features="html.parser")
105
106	# Find link tags. example: <link rel="stylesheet" href="css/somestyle.css">
107	# also handles favicon
108	for tag in soup.find_all("link", href=True):
109		if tag.has_attr("href"):
110			file_content: str | bytes | None = get_remote(
111				tag["href"],
112				download_remote=download_remote,
113				get_bytes=tag.get("rel") == ["icon"],  # assume text if not icon
114			)
115
116			if file_content is not None:
117				# remove the tag from soup
118				tag.extract()
119
120				if tag.get("rel") == ["stylesheet"]:
121					# insert style element for CSS
122					new_style: Tag = soup.new_tag("style")
123					new_style.string = file_content
124					soup.html.head.append(new_style)
125				elif tag.get("rel") == ["icon"]:
126					# handle favicon
127					mime_type = "image/x-icon"  # default mime type for favicon
128					if tag["href"].lower().endswith(".png"):
129						mime_type = "image/png"
130					elif tag["href"].lower().endswith(".ico"):
131						mime_type = "image/x-icon"
132
133					base64_content = base64.b64encode(file_content).decode("ascii")  # type: ignore[arg-type]
134					new_link: Tag = soup.new_tag(
135						"link",
136						rel="icon",
137						href=f"data:{mime_type};base64,{base64_content}",
138					)
139					soup.html.head.append(new_link)
140
141	# Find script tags. example: <script src="js/somescript.js"></script>
142	for tag in soup.find_all("script", src=True):
143		if tag.has_attr("src"):
144			file_text: str | bytes | None = get_remote(
145				tag["src"],
146				download_remote=download_remote,
147			)
148
149			if file_text is not None:
150				# remove the tag from soup
151				tag.extract()
152
153				# insert script element
154				new_script: Tag = soup.new_tag("script")
155				new_script.string = file_text
156				soup.html.head.append(new_script)
157
158	# Find image tags. example: <img src="images/img1.png">
159	for tag in soup.find_all("img", src=True):
160		if tag.has_attr("src"):
161			img_content: bytes | None = get_remote(
162				tag["src"], download_remote=download_remote, get_bytes=True
163			)
164
165			if img_content is not None:
166				# replace filename with base64 of the content of the file
167				base64_img_content: bytes = base64.b64encode(img_content)  # type: ignore[arg-type]
168				tag["src"] = "data:image/png;base64, {}".format(
169					base64_img_content.decode("ascii")
170				)
171
172	out_html: str = str(soup)
173
174	if minify:
175		import minify_html  # type: ignore[import-untyped]
176
177		out_html = minify_html.minify(out_html, minify_css=True, minify_js=True)
178
179	return out_html

Build a single file html from a folder

partially from https://stackoverflow.com/questions/44646481/merging-js-css-html-into-single-html

def main() -> None:
182def main() -> None:
183	# parse args
184	import argparse
185
186	parser: argparse.ArgumentParser = argparse.ArgumentParser(
187		description="Build a single file HTML from a folder"
188	)
189	parser.add_argument("path", type=str, help="Path to the HTML file or folder")
190	parser.add_argument(
191		"--output", "-o", type=str, help="Output file path (default: print to console)"
192	)
193	parser.add_argument("--no-minify", action="store_true", help="Disable minification")
194	parser.add_argument(
195		"--download",
196		"-d",
197		action="store_true",
198		help="Disable downloading remote resources",
199	)
200	parser.add_argument("--json", "-j", action="store_true", help="Output as JSON")
201	parser.add_argument(
202		"--pkg-info",
203		"-p",
204		type=str,
205		help="Add a comment with info from the given `pyproject.toml` file",
206	)
207
208	args: argparse.Namespace = parser.parse_args()
209
210	input_path: Path = Path(args.path)
211	if not input_path.exists():
212		raise FileNotFoundError(f"Path {input_path} does not exist")
213
214	# build page
215	result: str = build_dist(
216		path=input_path,
217		minify=not args.no_minify,
218		download_remote=args.download,
219	)
220
221	# add package info
222	if args.pkg_info:
223		try:
224			import tomllib  # type: ignore
225		except ImportError:
226			import tomli as tomllib  # type: ignore
227
228		# read pyproject.toml
229		with open(args.pkg_info, "rb") as f:
230			pkg_info = tomllib.load(f)
231		# get package name and version
232		pkg_name: str = pkg_info["project"].get("name", "")
233		pkg_version: str = pkg_info["project"].get("version", "")
234		pkg_homepage: str = pkg_info["project"].get("urls", {}).get("Homepage", "")
235		# add comment
236		result = f"<!-- {pkg_name} v{pkg_version} {pkg_homepage} -->\n" + result
237
238	# output as JSON
239	if args.json:
240		result = json.dumps(result)
241
242	# print or save
243	output_path = args.output or None
244	if output_path is None:
245		print(result)
246	else:
247		with open(output_path, "w", encoding="utf-8") as f:
248			f.write(result)