Coverage for src/artemis_sg/items.py: 94%

76 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-12 06:30 -0700

1import json 

2import logging 

3import string 

4 

5from artemis_sg.item import Item 

6 

7 

8class Items: 

9 """ 

10 Collection object for artemis_slide_generaor.Item objects. 

11 """ 

12 

13 # Constants 

14 ALPHA_LIST = tuple(string.ascii_uppercase) 

15 

16 # methods 

17 def __init__(self, keys, value_list, isbn_key): 

18 """ 

19 Instantiate Items object 

20 

21 Arguments: 

22 keys -- list of strings to use as item keys 

23 value_list -- list of value lists, nested list positions correspond to keys 

24 isbn_key -- the key in keys that corresponds with ISBN (primary key) 

25 

26 Returns: 

27 Items object 

28 """ 

29 namespace = f"{type(self).__name__}.{self.__init__.__name__}" 

30 

31 if len(keys) != len(value_list[0]): 31 ↛ 32line 31 didn't jump to line 32, because the condition on line 31 was never true

32 logging.error(f"{namespace}: Key count does not match value count.") 

33 raise IndexError 

34 

35 self.isbn_key = isbn_key 

36 self.column_dict = dict(zip(keys, Items.ALPHA_LIST)) 

37 

38 self.items = [] 

39 for row_num, entry in enumerate(value_list): 

40 self.items.append(Item(keys, entry, row_num, self.isbn_key)) 

41 

42 def get_items(self): 

43 return self.items 

44 

45 def __iter__(self): 

46 return iter(self.items) 

47 

48 def get_json_data_from_file(self, datafile): 

49 namespace = f"{type(self).__name__}.{self.get_json_data_from_file.__name__}" 

50 try: 

51 with open(datafile) as filepointer: 

52 data = json.load(filepointer) 

53 filepointer.close() 

54 return data 

55 except FileNotFoundError: 

56 logging.error(f"{namespace}: Datafile '{datafile}' not found") 

57 return {} 

58 except json.decoder.JSONDecodeError: 

59 logging.error( 

60 f"{namespace}: Datafile '{datafile}' did not contain valid JSON" 

61 ) 

62 return {} 

63 

64 def load_scraped_data(self, datafile): 

65 data = self.get_json_data_from_file(datafile) 

66 self.set_scraped_data(data) 

67 

68 def save_scraped_data(self, datafile): 

69 namespace = f"{type(self).__name__}.{self.save_scraped_data.__name__}" 

70 

71 internal_data = self.get_scraped_data() 

72 external_data = self.get_json_data_from_file(datafile) 

73 external_data.update(internal_data) 

74 if external_data: 

75 logging.debug(f"{namespace}: attempting to open {datafile}") 

76 with open(datafile, "w+") as filepointer: 

77 logging.debug(f"{namespace}: dumping scraped data to {datafile}") 

78 json.dump(external_data, filepointer, indent=4) 

79 filepointer.close() 

80 

81 def set_scraped_data(self, data): 

82 for isbn in data: 

83 item = self.find_item(isbn) 

84 if not item: 

85 continue 

86 item.data["DESCRIPTION"] = data[isbn]["DESCRIPTION"] 

87 item.image_urls = data[isbn]["image_urls"] 

88 

89 def get_scraped_data(self): 

90 data = {} 

91 for item in self.items: 

92 if item.image_urls != []: 

93 data_elem = {} 

94 data_elem["isbn10"] = item.isbn10 

95 data_elem["image_urls"] = item.image_urls 

96 if "DESCRIPTION" in item.data: 96 ↛ 98line 96 didn't jump to line 98, because the condition on line 96 was never false

97 data_elem["DESCRIPTION"] = item.data["DESCRIPTION"] 

98 data[item.isbn] = data_elem 

99 

100 return data 

101 

102 def find_item(self, isbn): 

103 for item in self.items: 

104 if item.isbn == isbn: 

105 return item 

106 return None 

107 

108 def get_items_with_image_urls(self): 

109 # WARNING: this looks a scraped urls to determine if the item has images. 

110 # Images may be retrieved from GCloud storage. So, there may be cases 

111 # where this method of searching leads to false positives/negatives. 

112 items_with_images = [] 

113 for item in self.items: 

114 if item.image_urls != []: 114 ↛ 115line 114 didn't jump to line 115, because the condition on line 114 was never true

115 items_with_images.append(item) 

116 return items_with_images