Coverage for amazonorders/entity/parsable.py: 89.58%
48 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-01-30 14:24 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-01-30 14:24 +0000
1import logging
2from typing import Callable, Any, Optional, Type, Union
4from bs4 import Tag
6from amazonorders.constants import BASE_URL
7from amazonorders.exception import AmazonOrdersError, AmazonOrderEntityError
9__author__ = "Alex Laird"
10__copyright__ = "Copyright 2024, Alex Laird"
11__version__ = "1.0.7"
13logger = logging.getLogger(__name__)
16class Parsable:
17 """
18 A base class that contains a parsed representation of the entity, and can be extended to
19 be made up of the entities fields utilizing the helper methods.
20 """
22 def __init__(self,
23 parsed: Tag) -> None:
24 #: Parsed HTML data that can be used to populate the fields of the entity.
25 self.parsed: Tag = parsed
27 def safe_parse(self,
28 parse_function: Callable[..., Any],
29 **kwargs: Any) -> Any:
30 """
31 Execute the given parse function on a field, handling any common parse exceptions and passing
32 them as warnings to the logger, suppressing them as exceptions.
34 :param parse_function: The parse function to attempt safe execution.
35 :param kwargs: The ``kwargs`` will be passed to ``parse_function``.
36 :return: The return value from ``parse_function``.
37 """
38 if not parse_function.__name__.startswith("_parse_") and parse_function.__name__ != "simple_parse":
39 raise AmazonOrdersError("The name of the `parse_function` passed to this method must start with `_parse_`")
41 try:
42 return parse_function(**kwargs)
43 except (AttributeError, IndexError, ValueError):
44 logger.warning("When building {}, `{}` could not be parsed.".format(self.__class__.__name__,
45 parse_function.__name__.split(
46 "_parse_")[1]),
47 exc_info=True)
49 def simple_parse(self,
50 selector: Union[str, list],
51 link: bool = False,
52 return_type: Optional[Type] = None,
53 text_contains: Optional[str] = None,
54 required: bool = False, ) -> Any:
55 """
56 Will attempt to extract the text value of the given CSS selector(s) for a field, and
57 is suitable for most basic functionality on a well-formed page.
59 The ``selector`` can be either a ``str`` or a ``list``. If a ``list`` is given, each
60 selector in the list will be tried.
62 :param selector: The CSS selector(s) for the field.
63 :param link: If a link, the value of ``src`` or ``href`` will be returned.
64 :param return_type: Specify ``int`` or ``float`` to return a value other than ``str``.
65 :param text_contains: Only select the field if this value is found in its text content.
66 :param required: If required, an exception will be thrown instead of returning ``None``.
67 :return: The cleaned up return value from the parsed ``selector``.
68 """
69 if isinstance(selector, str):
70 selector = [selector]
72 value = None
74 for s in selector:
75 tag = self.parsed.select_one(s)
76 if tag:
77 if link:
78 key = "href"
79 if "src" in tag.attrs:
80 key = "src"
81 value = self.with_base_url(tag.attrs[key])
82 else:
83 if text_contains and text_contains not in tag.text:
84 continue
86 value = tag.text.strip()
87 # TODO: is there a dynamic way to accomplish this?
88 if return_type == float:
89 value = float(value)
90 elif return_type == int:
91 value = int(value)
92 break
94 # None of the selectors were found
95 if not value and required:
96 raise AmazonOrderEntityError(
97 "When building {}, field for selector `{}` was None, but this is not allowed.".format(
98 self.__class__.__name__, selector))
100 return value
102 def safe_simple_parse(self,
103 selector: Union[str, list],
104 **kwargs) -> Any:
105 """
106 A helper function that uses :func:`simple_parse` as the ``parse_function()`` passed to :func:`safe_parse`.
108 :param selector: The selector to pass to :func:`simple_parse`.
109 :param kwargs: The ``kwargs`` will be passed to ``parse_function``.
110 :return: The return value from :func:`simple_parse`.
111 """
112 return self.safe_parse(self.simple_parse, selector=selector, **kwargs)
114 def with_base_url(self, url):
115 """
116 If the given URL is relative, the ``BASE_URL`` will be prepended.
118 :param url: The URL to check.
119 :return: The fully qualified URL.
120 """
121 if not url.startswith("http"):
122 url = "{}{}".format(BASE_URL, url)
123 return url