Coverage for src/artemis_sg/items.py: 94%

76 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-05 18:00 -0700

1# -*- coding: utf-8 -*- 

2 

3import json 

4import logging 

5import string 

6 

7from artemis_sg.item import Item 

8 

9 

10class Items: 

11 """ 

12 Collection object for artemis_slide_generaor.Item objects. 

13 """ 

14 

15 # Constants 

16 ALPHA_LIST = list(string.ascii_uppercase) 

17 

18 # methods 

19 def __init__(self, keys, value_list, isbn_key): 

20 """ 

21 Instantiate Items object 

22 

23 Arguments: 

24 keys -- list of strings to use as item keys 

25 value_list -- list of value lists, nested list positions correspond to keys 

26 isbn_key -- the key in keys that corresponds with ISBN (primary key) 

27 

28 Returns: 

29 Items object 

30 """ 

31 namespace = f"{type(self).__name__}.{self.__init__.__name__}" 

32 

33 if len(keys) != len(value_list[0]): 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true

34 logging.error(f"{namespace}: Key count does not match value count.") 

35 raise IndexError 

36 

37 self.isbn_key = isbn_key 

38 self.column_dict = dict(zip(keys, Items.ALPHA_LIST)) 

39 

40 self.items = [] 

41 for row_num, entry in enumerate(value_list): 

42 self.items.append(Item(keys, entry, row_num, self.isbn_key)) 

43 

44 def get_items(self): 

45 return self.items 

46 

47 def __iter__(self): 

48 return iter(self.items) 

49 

50 def get_json_data_from_file(self, datafile): 

51 namespace = f"{type(self).__name__}.{self.get_json_data_from_file.__name__}" 

52 try: 

53 with open(datafile, "r") as filepointer: 

54 data = json.load(filepointer) 

55 filepointer.close() 

56 return data 

57 except FileNotFoundError: 

58 logging.error(f"{namespace}: Datafile '{datafile}' not found") 

59 return {} 

60 except json.decoder.JSONDecodeError: 

61 logging.error( 

62 f"{namespace}: Datafile '{datafile}' did not contain valid JSON" 

63 ) 

64 return {} 

65 

66 def load_scraped_data(self, datafile): 

67 data = self.get_json_data_from_file(datafile) 

68 self.set_scraped_data(data) 

69 

70 def save_scraped_data(self, datafile): 

71 namespace = f"{type(self).__name__}.{self.save_scraped_data.__name__}" 

72 

73 internal_data = self.get_scraped_data() 

74 external_data = self.get_json_data_from_file(datafile) 

75 external_data.update(internal_data) 

76 if external_data: 

77 logging.debug(f"{namespace}: attempting to open {datafile}") 

78 with open(datafile, "w+") as filepointer: 

79 logging.debug(f"{namespace}: dumping scraped data to {datafile}") 

80 json.dump(external_data, filepointer, indent=4) 

81 filepointer.close() 

82 

83 def set_scraped_data(self, data): 

84 for isbn in data.keys(): 

85 item = self.find_item(isbn) 

86 if not item: 

87 continue 

88 item.data["DESCRIPTION"] = data[isbn]["DESCRIPTION"] 

89 item.image_urls = data[isbn]["image_urls"] 

90 

91 def get_scraped_data(self): 

92 data = {} 

93 for item in self.items: 

94 if item.image_urls != []: 

95 data_elem = {} 

96 data_elem["isbn10"] = item.isbn10 

97 data_elem["image_urls"] = item.image_urls 

98 if "DESCRIPTION" in item.data.keys(): 98 ↛ 100line 98 didn't jump to line 100, because the condition on line 98 was never false

99 data_elem["DESCRIPTION"] = item.data["DESCRIPTION"] 

100 data[item.isbn] = data_elem 

101 

102 return data 

103 

104 def find_item(self, isbn): 

105 for item in self.items: 

106 if item.isbn == isbn: 

107 return item 

108 return None 

109 

110 def get_items_with_image_urls(self): 

111 # WARNING: this looks a scraped urls to determine if the item has images. 

112 # Images may be retrieved from GCloud storage. So, there may be cases 

113 # where this method of searching leads to false positives/negatives. 

114 items_with_images = [] 

115 for item in self.items: 

116 if item.image_urls != []: 116 ↛ 117line 116 didn't jump to line 117, because the condition on line 116 was never true

117 items_with_images.append(item) 

118 return items_with_images