Coverage for src/artemis_sg/items.py: 90%

91 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2024-03-06 08:01 -0800

1import json 

2import logging 

3import string 

4 

5from artemis_sg.item import Item 

6 

7 

8class Items: 

9 """ 

10 Collection object for artemis_slide_generaor.Item objects. 

11 """ 

12 

13 # Constants 

14 ALPHA_LIST = tuple(string.ascii_uppercase) 

15 

16 # methods 

17 def __init__(self, keys, value_list, isbn_key): 

18 """ 

19 Instantiate Items object 

20 

21 Arguments: 

22 keys -- list of strings to use as item keys 

23 value_list -- list of value lists, nested list positions correspond to keys 

24 isbn_key -- the key in keys that corresponds with ISBN (primary key) 

25 

26 Returns: 

27 Items object 

28 """ 

29 namespace = f"{type(self).__name__}.{self.__init__.__name__}" 

30 

31 len_keys = len(keys) 

32 len_vals = len(value_list[0]) 

33 if len_keys != len_vals: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true

34 logging.error( 

35 f"{namespace}: Key count ({len_keys}) " 

36 f"does not match value count ({len_vals})." 

37 ) 

38 logging.debug(f"keys: {keys}") 

39 logging.debug(f"first_row values: {value_list[0]}") 

40 raise IndexError 

41 

42 self.isbn_key = isbn_key 

43 self.column_dict = dict(zip(keys, Items.ALPHA_LIST)) 

44 

45 self.items = [] 

46 for row_num, entry in enumerate(value_list): 

47 i = Item(keys, entry, row_num, self.isbn_key) 

48 if any(i.data.values()): 

49 self.items.append(i) 

50 

51 def get_items(self): 

52 return self.items 

53 

54 def __iter__(self): 

55 return iter(self.items) 

56 

57 def get_json_data_from_file(self, datafile): 

58 namespace = f"{type(self).__name__}.{self.get_json_data_from_file.__name__}" 

59 try: 

60 with open(datafile) as filepointer: 

61 data = json.load(filepointer) 

62 filepointer.close() 

63 return data 

64 except FileNotFoundError: 

65 logging.error(f"{namespace}: Datafile '{datafile}' not found") 

66 return {} 

67 except json.decoder.JSONDecodeError: 

68 logging.error( 

69 f"{namespace}: Datafile '{datafile}' did not contain valid JSON" 

70 ) 

71 return {} 

72 

73 def load_scraped_data(self, datafile): 

74 data = self.get_json_data_from_file(datafile) 

75 self.set_scraped_data(data) 

76 

77 def save_scraped_data(self, datafile): 

78 namespace = f"{type(self).__name__}.{self.save_scraped_data.__name__}" 

79 

80 internal_data = self.get_scraped_data() 

81 external_data = self.get_json_data_from_file(datafile) 

82 external_data.update(internal_data) 

83 if external_data: 

84 logging.debug(f"{namespace}: attempting to open {datafile}") 

85 with open(datafile, "w+") as filepointer: 

86 logging.debug(f"{namespace}: dumping scraped data to {datafile}") 

87 json.dump(external_data, filepointer, indent=4) 

88 filepointer.close() 

89 

90 def set_scraped_data(self, data): 

91 for isbn in data: 

92 item = self.find_item(isbn) 

93 if not item: 

94 continue 

95 try: 

96 item.data["DESCRIPTION"] = data[isbn]["DESCRIPTION"] 

97 except KeyError: 

98 item.data["DESCRIPTION"] = "" 

99 try: 

100 item.data["DIMENSION"] = data[isbn]["DIMENSION"] 

101 except KeyError: 

102 item.data["DIMENSION"] = "" 

103 item.image_urls = data[isbn]["image_urls"] 

104 

105 def get_scraped_data(self): 

106 data = {} 

107 for item in self.items: 

108 if item.image_urls != []: 

109 data_elem = {} 

110 data_elem["isbn10"] = item.isbn10 

111 data_elem["image_urls"] = item.image_urls 

112 if "DESCRIPTION" in item.data: 112 ↛ 114line 112 didn't jump to line 114, because the condition on line 112 was never false

113 data_elem["DESCRIPTION"] = item.data["DESCRIPTION"] 

114 if "DIMENSION" in item.data: 114 ↛ 116line 114 didn't jump to line 116, because the condition on line 114 was never false

115 data_elem["DIMENSION"] = item.data["DIMENSION"] 

116 data[item.isbn] = data_elem 

117 

118 return data 

119 

120 def find_item(self, isbn): 

121 for item in self.items: 

122 if item.isbn == isbn: 

123 return item 

124 return None 

125 

126 def get_items_with_image_urls(self): 

127 # WARNING: this looks a scraped urls to determine if the item has images. 

128 # Images may be retrieved from GCloud storage. So, there may be cases 

129 # where this method of searching leads to false positives/negatives. 

130 items_with_images = [] 

131 for item in self.items: 

132 if item.image_urls != []: 132 ↛ 133line 132 didn't jump to line 133, because the condition on line 132 was never true

133 items_with_images.append(item) 

134 return items_with_images