Coverage for src/artemis_sg/items.py: 94%
76 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-12 17:31 -0700
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-12 17:31 -0700
1import json
2import logging
3import string
5from artemis_sg.item import Item
8class Items:
9 """
10 Collection object for artemis_slide_generaor.Item objects.
11 """
13 # Constants
14 ALPHA_LIST = tuple(string.ascii_uppercase)
16 # methods
17 def __init__(self, keys, value_list, isbn_key):
18 """
19 Instantiate Items object
21 Arguments:
22 keys -- list of strings to use as item keys
23 value_list -- list of value lists, nested list positions correspond to keys
24 isbn_key -- the key in keys that corresponds with ISBN (primary key)
26 Returns:
27 Items object
28 """
29 namespace = f"{type(self).__name__}.{self.__init__.__name__}"
31 if len(keys) != len(value_list[0]): 31 ↛ 32line 31 didn't jump to line 32, because the condition on line 31 was never true
32 logging.error(f"{namespace}: Key count does not match value count.")
33 raise IndexError
35 self.isbn_key = isbn_key
36 self.column_dict = dict(zip(keys, Items.ALPHA_LIST))
38 self.items = []
39 for row_num, entry in enumerate(value_list):
40 self.items.append(Item(keys, entry, row_num, self.isbn_key))
42 def get_items(self):
43 return self.items
45 def __iter__(self):
46 return iter(self.items)
48 def get_json_data_from_file(self, datafile):
49 namespace = f"{type(self).__name__}.{self.get_json_data_from_file.__name__}"
50 try:
51 with open(datafile) as filepointer:
52 data = json.load(filepointer)
53 filepointer.close()
54 return data
55 except FileNotFoundError:
56 logging.error(f"{namespace}: Datafile '{datafile}' not found")
57 return {}
58 except json.decoder.JSONDecodeError:
59 logging.error(
60 f"{namespace}: Datafile '{datafile}' did not contain valid JSON"
61 )
62 return {}
64 def load_scraped_data(self, datafile):
65 data = self.get_json_data_from_file(datafile)
66 self.set_scraped_data(data)
68 def save_scraped_data(self, datafile):
69 namespace = f"{type(self).__name__}.{self.save_scraped_data.__name__}"
71 internal_data = self.get_scraped_data()
72 external_data = self.get_json_data_from_file(datafile)
73 external_data.update(internal_data)
74 if external_data:
75 logging.debug(f"{namespace}: attempting to open {datafile}")
76 with open(datafile, "w+") as filepointer:
77 logging.debug(f"{namespace}: dumping scraped data to {datafile}")
78 json.dump(external_data, filepointer, indent=4)
79 filepointer.close()
81 def set_scraped_data(self, data):
82 for isbn in data:
83 item = self.find_item(isbn)
84 if not item:
85 continue
86 item.data["DESCRIPTION"] = data[isbn]["DESCRIPTION"]
87 item.image_urls = data[isbn]["image_urls"]
89 def get_scraped_data(self):
90 data = {}
91 for item in self.items:
92 if item.image_urls != []:
93 data_elem = {}
94 data_elem["isbn10"] = item.isbn10
95 data_elem["image_urls"] = item.image_urls
96 if "DESCRIPTION" in item.data: 96 ↛ 98line 96 didn't jump to line 98, because the condition on line 96 was never false
97 data_elem["DESCRIPTION"] = item.data["DESCRIPTION"]
98 data[item.isbn] = data_elem
100 return data
102 def find_item(self, isbn):
103 for item in self.items:
104 if item.isbn == isbn:
105 return item
106 return None
108 def get_items_with_image_urls(self):
109 # WARNING: this looks a scraped urls to determine if the item has images.
110 # Images may be retrieved from GCloud storage. So, there may be cases
111 # where this method of searching leads to false positives/negatives.
112 items_with_images = []
113 for item in self.items:
114 if item.image_urls != []: 114 ↛ 115line 114 didn't jump to line 115, because the condition on line 114 was never true
115 items_with_images.append(item)
116 return items_with_images