Coverage for src/artemis_sg/items.py: 94%
76 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-05 18:00 -0700
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-05 18:00 -0700
1# -*- coding: utf-8 -*-
3import json
4import logging
5import string
7from artemis_sg.item import Item
10class Items:
11 """
12 Collection object for artemis_slide_generaor.Item objects.
13 """
15 # Constants
16 ALPHA_LIST = list(string.ascii_uppercase)
18 # methods
19 def __init__(self, keys, value_list, isbn_key):
20 """
21 Instantiate Items object
23 Arguments:
24 keys -- list of strings to use as item keys
25 value_list -- list of value lists, nested list positions correspond to keys
26 isbn_key -- the key in keys that corresponds with ISBN (primary key)
28 Returns:
29 Items object
30 """
31 namespace = f"{type(self).__name__}.{self.__init__.__name__}"
33 if len(keys) != len(value_list[0]): 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true
34 logging.error(f"{namespace}: Key count does not match value count.")
35 raise IndexError
37 self.isbn_key = isbn_key
38 self.column_dict = dict(zip(keys, Items.ALPHA_LIST))
40 self.items = []
41 for row_num, entry in enumerate(value_list):
42 self.items.append(Item(keys, entry, row_num, self.isbn_key))
44 def get_items(self):
45 return self.items
47 def __iter__(self):
48 return iter(self.items)
50 def get_json_data_from_file(self, datafile):
51 namespace = f"{type(self).__name__}.{self.get_json_data_from_file.__name__}"
52 try:
53 with open(datafile, "r") as filepointer:
54 data = json.load(filepointer)
55 filepointer.close()
56 return data
57 except FileNotFoundError:
58 logging.error(f"{namespace}: Datafile '{datafile}' not found")
59 return {}
60 except json.decoder.JSONDecodeError:
61 logging.error(
62 f"{namespace}: Datafile '{datafile}' did not contain valid JSON"
63 )
64 return {}
66 def load_scraped_data(self, datafile):
67 data = self.get_json_data_from_file(datafile)
68 self.set_scraped_data(data)
70 def save_scraped_data(self, datafile):
71 namespace = f"{type(self).__name__}.{self.save_scraped_data.__name__}"
73 internal_data = self.get_scraped_data()
74 external_data = self.get_json_data_from_file(datafile)
75 external_data.update(internal_data)
76 if external_data:
77 logging.debug(f"{namespace}: attempting to open {datafile}")
78 with open(datafile, "w+") as filepointer:
79 logging.debug(f"{namespace}: dumping scraped data to {datafile}")
80 json.dump(external_data, filepointer, indent=4)
81 filepointer.close()
83 def set_scraped_data(self, data):
84 for isbn in data.keys():
85 item = self.find_item(isbn)
86 if not item:
87 continue
88 item.data["DESCRIPTION"] = data[isbn]["DESCRIPTION"]
89 item.image_urls = data[isbn]["image_urls"]
91 def get_scraped_data(self):
92 data = {}
93 for item in self.items:
94 if item.image_urls != []:
95 data_elem = {}
96 data_elem["isbn10"] = item.isbn10
97 data_elem["image_urls"] = item.image_urls
98 if "DESCRIPTION" in item.data.keys(): 98 ↛ 100line 98 didn't jump to line 100, because the condition on line 98 was never false
99 data_elem["DESCRIPTION"] = item.data["DESCRIPTION"]
100 data[item.isbn] = data_elem
102 return data
104 def find_item(self, isbn):
105 for item in self.items:
106 if item.isbn == isbn:
107 return item
108 return None
110 def get_items_with_image_urls(self):
111 # WARNING: this looks a scraped urls to determine if the item has images.
112 # Images may be retrieved from GCloud storage. So, there may be cases
113 # where this method of searching leads to false positives/negatives.
114 items_with_images = []
115 for item in self.items:
116 if item.image_urls != []: 116 ↛ 117line 116 didn't jump to line 117, because the condition on line 116 was never true
117 items_with_images.append(item)
118 return items_with_images