Coverage for amazonorders/entity/parsable.py: 89.58%

48 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-07 21:56 +0000

1import logging 

2from typing import Callable, Any, Optional, Type, Union 

3 

4from bs4 import Tag 

5 

6from amazonorders.constants import BASE_URL 

7from amazonorders.exception import AmazonOrdersError, AmazonOrderEntityError 

8 

9__author__ = "Alex Laird" 

10__copyright__ = "Copyright 2024, Alex Laird" 

11__version__ = "1.0.7" 

12 

13logger = logging.getLogger(__name__) 

14 

15 

16class Parsable: 

17 """ 

18 A base class that contains a parsed representation of the entity, and can be extended to 

19 be made up of the entities fields utilizing the helper methods. 

20 """ 

21 

22 def __init__(self, 

23 parsed: Tag) -> None: 

24 #: Parsed HTML data that can be used to populate the fields of the entity. 

25 self.parsed: Tag = parsed 

26 

27 def safe_parse(self, 

28 parse_function: Callable[..., Any], 

29 **kwargs: Any) -> Any: 

30 """ 

31 Execute the given parse function on a field, handling any common parse exceptions and passing 

32 them as warnings to the logger, suppressing them as exceptions. 

33 

34 :param parse_function: The parse function to attempt safe execution. 

35 :param kwargs: The ``kwargs`` will be passed to ``parse_function``. 

36 :return: The return value from ``parse_function``. 

37 """ 

38 if not parse_function.__name__.startswith("_parse_") and parse_function.__name__ != "simple_parse": 

39 raise AmazonOrdersError("The name of the `parse_function` passed to this method must start with `_parse_`") 

40 

41 try: 

42 return parse_function(**kwargs) 

43 except (AttributeError, IndexError, ValueError): 

44 logger.warning("When building {}, `{}` could not be parsed.".format(self.__class__.__name__, 

45 parse_function.__name__.split( 

46 "_parse_")[1]), 

47 exc_info=True) 

48 

49 def simple_parse(self, 

50 selector: Union[str, list], 

51 link: bool = False, 

52 return_type: Optional[Type] = None, 

53 text_contains: Optional[str] = None, 

54 required: bool = False, ) -> Any: 

55 """ 

56 Will attempt to extract the text value of the given CSS selector(s) for a field, and 

57 is suitable for most basic functionality on a well-formed page. 

58 

59 The ``selector`` can be either a ``str`` or a ``list``. If a ``list`` is given, each 

60 selector in the list will be tried. 

61 

62 :param selector: The CSS selector(s) for the field. 

63 :param link: If a link, the value of ``src`` or ``href`` will be returned. 

64 :param return_type: Specify ``int`` or ``float`` to return a value other than ``str``. 

65 :param text_contains: Only select the field if this value is found in its text content. 

66 :param required: If required, an exception will be thrown instead of returning ``None``. 

67 :return: The cleaned up return value from the parsed ``selector``. 

68 """ 

69 if isinstance(selector, str): 

70 selector = [selector] 

71 

72 value = None 

73 

74 for s in selector: 

75 tag = self.parsed.select_one(s) 

76 if tag: 

77 if link: 

78 key = "href" 

79 if "src" in tag.attrs: 

80 key = "src" 

81 value = self.with_base_url(tag.attrs[key]) 

82 else: 

83 if text_contains and text_contains not in tag.text: 

84 continue 

85 

86 value = tag.text.strip() 

87 # TODO: is there a dynamic way to accomplish this? 

88 if return_type == float: 

89 value = float(value) 

90 elif return_type == int: 

91 value = int(value) 

92 break 

93 

94 # None of the selectors were found 

95 if not value and required: 

96 raise AmazonOrderEntityError( 

97 "When building {}, field for selector `{}` was None, but this is not allowed.".format( 

98 self.__class__.__name__, selector)) 

99 

100 return value 

101 

102 def safe_simple_parse(self, 

103 selector: Union[str, list], 

104 **kwargs) -> Any: 

105 """ 

106 A helper function that uses :func:`simple_parse` as the ``parse_function()`` passed to :func:`safe_parse`. 

107 

108 :param selector: The selector to pass to :func:`simple_parse`. 

109 :param kwargs: The ``kwargs`` will be passed to ``parse_function``. 

110 :return: The return value from :func:`simple_parse`. 

111 """ 

112 return self.safe_parse(self.simple_parse, selector=selector, **kwargs) 

113 

114 def with_base_url(self, url): 

115 """ 

116 If the given URL is relative, the ``BASE_URL`` will be prepended. 

117 

118 :param url: The URL to check. 

119 :return: The fully qualified URL. 

120 """ 

121 if not url.startswith("http"): 

122 url = "{}{}".format(BASE_URL, url) 

123 return url