Coverage for src/artemis_sg/spreadsheet.py: 84%

299 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2024-03-06 08:01 -0800

1import logging 

2import math 

3import os 

4import re 

5from copy import copy 

6from inspect import getsourcefile 

7 

8from googleapiclient.discovery import build 

9from openpyxl import load_workbook 

10from openpyxl.drawing.image import Image 

11from openpyxl.styles import Alignment 

12from openpyxl.utils import get_column_letter 

13from openpyxl.utils.exceptions import InvalidFileException 

14from openpyxl.worksheet.dimensions import ColumnDimension, DimensionHolder 

15from PIL import Image as PIL_Image 

16from PIL import UnidentifiedImageError 

17 

18from artemis_sg import app_creds, items, vendor 

19from artemis_sg.config import CFG 

20 

21MODULE = os.path.splitext(os.path.basename(__file__))[0] 

22 

23 

24def get_worksheet(wb_obj, worksheet): 

25 ws = wb_obj.worksheets[0] if not worksheet else wb_obj[worksheet] 

26 return ws 

27 

28def get_sheet_keys(ws): 

29 for row in ws.values: 29 ↛ 32line 29 didn't jump to line 32, because the loop on line 29 didn't complete

30 sheet_keys = [x.upper() if isinstance(x, str) else x for x in row] 

31 break 

32 return sheet_keys 

33 

34def shift_col(ws, col_key, target_idx): 

35 ws.insert_cols(target_idx) 

36 sheet_keys = get_sheet_keys(ws) 

37 sheet_key_idx = sheet_keys.index(col_key) + 1 # for openpyxl 

38 sheet_key_idx_ltr = get_column_letter(sheet_key_idx) 

39 col_delta = target_idx - sheet_key_idx 

40 ws.move_range(f"{sheet_key_idx_ltr}1:{sheet_key_idx_ltr}{ws.max_row}", 

41 rows=0, cols=col_delta) 

42 ws.delete_cols(sheet_key_idx) 

43 

44def copy_cell_style(ws, style_src_cell, target_cell): 

45 if style_src_cell.has_style: 

46 ws[target_cell].font = copy(style_src_cell.font) 

47 ws[target_cell].border = copy(style_src_cell.border) 

48 ws[target_cell].fill = copy(style_src_cell.fill) 

49 ws[target_cell].number_format = copy(style_src_cell.number_format) 

50 ws[target_cell].protection = copy(style_src_cell.protection) 

51 ws[target_cell].alignment = copy(style_src_cell.alignment) 

52 

53def create_col(ws, col_key, target_idx, style_src_cell=None): 

54 ws.insert_cols(target_idx) 

55 col_header = f"{get_column_letter(target_idx)}1" 

56 ws[col_header] = col_key.title() 

57 if style_src_cell: 57 ↛ 58line 57 didn't jump to line 58, because the condition on line 57 was never true

58 copy_cell_style(ws, style_src_cell, col_header) 

59 

60def sequence_worksheet(ws, col_order, isbn_key): 

61 sheet_keys = get_sheet_keys(ws) 

62 for i, key_name in enumerate(col_order): 

63 order_idx = i + 1 # for openpyxl 

64 if key_name == "ISBN": 

65 key_name = isbn_key # noqa: PLW2901 

66 if key_name in sheet_keys: 

67 shift_col(ws, key_name, order_idx) 

68 else: 

69 create_col(ws, key_name, order_idx) 

70 

71def size_sheet_cols(ws, isbn_key): 

72 dim_holder = DimensionHolder(worksheet=ws) 

73 sheet_keys = get_sheet_keys(ws) 

74 for i, key_name in enumerate(sheet_keys): 

75 col_idx = i + 1 # for openpyxl 

76 col_idx_ltr = get_column_letter(col_idx) 

77 width = ( 

78 max(len(str(cell.value)) for cell in ws[col_idx_ltr]) 

79 * CFG["asg"]["spreadsheet"]["sheet_image"]["col_buffer"] 

80 ) 

81 if width > CFG["asg"]["spreadsheet"]["sheet_image"]["max_col_width"]: 81 ↛ 82line 81 didn't jump to line 82, because the condition on line 81 was never true

82 width = CFG["asg"]["spreadsheet"]["sheet_image"]["max_col_width"] 

83 dim_holder[col_idx_ltr] = ColumnDimension(ws, index=col_idx_ltr, width=width) 

84 if key_name == isbn_key: 

85 dim_holder[col_idx_ltr] = ColumnDimension( 

86 ws, 

87 index=col_idx_ltr, 

88 width=math.ceil( 

89 CFG["asg"]["spreadsheet"]["sheet_image"]["isbn_col_width"] 

90 * CFG["asg"]["spreadsheet"]["sheet_image"]["col_buffer"] 

91 ), 

92 ) 

93 if key_name == "IMAGE": 

94 dim_holder[col_idx_ltr] = ColumnDimension( 

95 ws, 

96 index=col_idx_ltr, 

97 width=CFG["asg"]["spreadsheet"]["sheet_image"]["image_col_width"] 

98 ) 

99 

100 ws.column_dimensions = dim_holder 

101 

102def insert_image(image_directory, ws, isbn_cell, image_cell): 

103 namespace = f"{MODULE}.{insert_image.__name__}" 

104 image_row_height = CFG["asg"]["spreadsheet"]["sheet_image"]["image_row_height"] 

105 if isbn_cell.value: 105 ↛ 119line 105 didn't jump to line 119, because the condition on line 105 was never false

106 isbn = isbn_cell.value 

107 if isinstance(isbn, float): 

108 isbn = int(isbn) 

109 elif isinstance(isbn, str): 109 ↛ 110line 109 didn't jump to line 110, because the condition on line 109 was never true

110 m = re.search('="(.*)"', isbn) 

111 if m: 

112 isbn = m.group(1) 

113 try: 

114 isbn = str(isbn).strip() 

115 except Exception as e: 

116 logging.error(f"{namespace}: Err reading isbn '{isbn}', err: '{e}'") 

117 isbn = "" 

118 else: 

119 isbn = "" 

120 # Set row height 

121 row_dim = ws.row_dimensions[image_cell.row] 

122 row_dim.height = image_row_height 

123 

124 # Insert image into cell 

125 filename = f"{isbn}.jpg" 

126 filepath = os.path.join(image_directory, filename) 

127 logging.debug(f"{namespace}: Attempting to insert '{filepath}'.") 

128 if os.path.isfile(filepath): 

129 img = Image(filepath) 

130 ws.add_image(img, f"{image_cell.column_letter}{image_cell.row}") 

131 logging.info(f"{namespace}: Inserted '{filepath}'.") 

132 

133 

134def sheet_image(vendor_code, workbook, worksheet, image_directory, out): 

135 namespace = f"{MODULE}.{sheet_image.__name__}" 

136 

137 # get vendor info from database 

138 logging.debug(f"{namespace}: Instantiate vendor.") 

139 vendr = vendor.Vendor(vendor_code) 

140 vendr.set_vendor_data() 

141 

142 isbn_key = vendr.isbn_key 

143 logging.debug(f"{namespace}: Setting ISBN_KEY to '{isbn_key}'.") 

144 

145 # Load worksheet 

146 logging.info(f"{namespace}: Workbook is {workbook}") 

147 wb = load_workbook(workbook) 

148 ws = get_worksheet(wb, worksheet) 

149 logging.info(f"{namespace}: Worksheet is {ws.title}") 

150 

151 col_order = CFG["asg"]["spreadsheet"]["sheet_image"]["col_order"] 

152 sequence_worksheet(ws, col_order, isbn_key) 

153 size_sheet_cols(ws, isbn_key) 

154 

155 # Prepare "IMAGE" column 

156 sk = get_sheet_keys(ws) 

157 try: 

158 img_idx = sk.index("IMAGE") + 1 

159 img_idx_ltr = get_column_letter(img_idx) 

160 except ValueError as e: 

161 logging.error(f"{namespace}: Err finding 'IMAGE' column in sheet '{workbook}'.") 

162 logging.error("Aborting.") 

163 raise e 

164 try: 

165 isbn_idx = sk.index(isbn_key) + 1 

166 isbn_idx_ltr = get_column_letter(isbn_idx) 

167 except ValueError as e: 

168 logging.error( 

169 f"{namespace}: Err, no '{isbn_key}' column in sheet '{workbook}'.") 

170 logging.error("Aborting.") 

171 raise e 

172 

173 for i in range(1, ws.max_row): 

174 isbn_cell = ws[f"{isbn_idx_ltr}{i+1}"] 

175 image_cell = ws[f"{img_idx_ltr}{i+1}"] 

176 # Format to center content 

177 image_cell.alignment = Alignment(horizontal="center") 

178 insert_image(image_directory, ws, isbn_cell, image_cell) 

179 

180 # Save workbook 

181 wb.save(out) 

182 

183 

184def validate_isbn(isbn): 

185 namespace = f"{MODULE}.{validate_isbn.__name__}" 

186 valid_isbn = "" 

187 if isinstance(isbn, str): 187 ↛ 188line 187 didn't jump to line 188, because the condition on line 187 was never true

188 m = re.search('="(.*)"', isbn) 

189 if m: 

190 isbn = m.group(1) 

191 try: 

192 valid_isbn = str(int(isbn)).strip() 

193 except Exception as e: 

194 logging.error(f"{namespace}: Err reading isbn '{isbn}', err: '{e}'") 

195 valid_isbn = "" 

196 return valid_isbn 

197 

198 

199def validate_qty(qty): 

200 namespace = f"{MODULE}.{validate_qty.__name__}" 

201 try: 

202 valid_qty = str(int(qty)).strip() 

203 except Exception as e: 

204 logging.error(f"{namespace}: Err reading Order qty '{qty}', err: '{e}'") 

205 valid_qty = None 

206 return valid_qty 

207 

208 

209def get_order_items(vendor_code, workbook, worksheet): 

210 namespace = f"{MODULE}.{get_order_items.__name__}" 

211 

212 order_items = [] 

213 # get vendor info from database 

214 logging.debug(f"{namespace}: Instantiate vendor.") 

215 vendr = vendor.Vendor(vendor_code) 

216 vendr.set_vendor_data() 

217 

218 isbn_key = vendr.isbn_key 

219 logging.debug(f"{namespace}: Setting ISBN_KEY to '{isbn_key}'.") 

220 

221 # Load worksheet 

222 logging.info(f"{namespace}: Workbook is {workbook}") 

223 wb = load_workbook(workbook) 

224 ws = get_worksheet(wb, worksheet) 

225 logging.info(f"{namespace}: Worksheet is {ws.title}") 

226 

227 # Find Isbn and Order column letters 

228 row01 = ws[1] 

229 for cell in row01: 

230 if cell.value == isbn_key: 

231 isbn_column_letter = cell.column_letter 

232 if cell.value == "Order": 

233 order_column_letter = cell.column_letter 

234 

235 for row in ws.iter_rows(min_row=2): 

236 for cell in row: 

237 if cell.column_letter == isbn_column_letter: 

238 isbn_cell = cell 

239 if cell.column_letter == order_column_letter: 

240 order_cell = cell 

241 # Validate ISBN 

242 isbn = validate_isbn(isbn_cell.value) 

243 if not isbn: 243 ↛ 244line 243 didn't jump to line 244, because the condition on line 243 was never true

244 continue 

245 # Validate Order Qty 

246 qty = validate_qty(order_cell.value) 

247 if not qty: 247 ↛ 248line 247 didn't jump to line 248, because the condition on line 247 was never true

248 continue 

249 order_items.append((isbn, qty)) 

250 

251 return order_items 

252 

253 

254def mkthumbs(image_directory): 

255 namespace = f"{MODULE}.{mkthumbs.__name__}" 

256 

257 thumb_width = CFG["asg"]["spreadsheet"]["mkthumbs"]["width"] 

258 thumb_height = CFG["asg"]["spreadsheet"]["mkthumbs"]["height"] 

259 

260 here = os.path.dirname(getsourcefile(lambda: 0)) 260 ↛ exitline 260 didn't run the lambda on line 260

261 data = os.path.abspath(os.path.join(here, "data")) 

262 logo = os.path.join(data, "artemis_logo.png") 

263 logging.debug(f"{namespace}: Found image for thumbnail background at '{logo}'") 

264 sub_dir = "thumbnails" 

265 back = PIL_Image.open(logo) 

266 thumb_dir = os.path.join(image_directory, sub_dir) 

267 logging.debug(f"{namespace}: Defining thumbnail directory as '{thumb_dir}'") 

268 if not os.path.isdir(thumb_dir): 268 ↛ 278line 268 didn't jump to line 278, because the condition on line 268 was never false

269 logging.debug(f"{namespace}: Creating directory '{thumb_dir}'") 

270 os.mkdir(thumb_dir) 

271 if os.path.isdir(thumb_dir): 271 ↛ 274line 271 didn't jump to line 274, because the condition on line 271 was never false

272 logging.info(f"{namespace}: Successfully created directory '{thumb_dir}'") 

273 else: 

274 logging.error( 

275 f"{namespace}: Failed to create directory '{thumb_dir}'. Aborting." 

276 ) 

277 raise Exception 

278 files = os.listdir(image_directory) 

279 for f in files: 

280 # Valid files are JPG or PNG that are not supplemental images. 

281 image = re.match(r"^.+\.(?:jpg|png)$", f) 

282 if not image: 

283 continue 

284 # Supplemental images have a "-[0-9]+" suffix before the file type. 

285 # AND a file without that suffix exists int he image_directory. 

286 suffix = re.match(r"(^.+)-[0-9]+(\.(?:jpg|png))$", f) 

287 if suffix: 

288 primary = suffix.group(1) + suffix.group(2) 

289 primary_path = os.path.join(image_directory, primary) 

290 if os.path.isfile(primary_path): 

291 continue 

292 thumb_file = os.path.join(thumb_dir, f) 

293 # don't remake thumbnails 

294 if os.path.isfile(thumb_file): 294 ↛ 295line 294 didn't jump to line 295, because the condition on line 294 was never true

295 continue 

296 bk = back.copy() 

297 try: 

298 file_path = os.path.join(image_directory, f) 

299 fg = PIL_Image.open(file_path) 

300 except UnidentifiedImageError: 

301 logging.error(f"{namespace}: Err reading '{f}', deleting '{file_path}'") 

302 os.remove(file_path) 

303 continue 

304 fg.thumbnail((thumb_width, thumb_height)) 

305 size = (int((bk.size[0] - fg.size[0]) / 2), int((bk.size[1] - fg.size[1]) / 2)) 

306 bk.paste(fg, size) 

307 logging.debug(f"{namespace}: Attempting to save thumbnail '{thumb_file}'") 

308 bkn = bk.convert("RGB") 

309 bkn.save(thumb_file) 

310 logging.info(f"{namespace}: Successfully created thumbnail '{thumb_file}'") 

311 

312 

313def get_sheet_data(workbook, worksheet=None): 

314 namespace = f"{MODULE}.{get_sheet_data.__name__}" 

315 ######################################################################### 

316 # Try to open sheet_id as an Excel file 

317 sheet_data = [] 

318 try: 

319 wb = load_workbook(workbook) 

320 ws = get_worksheet(wb, worksheet) 

321 for row in ws.values: 

322 sheet_data.append(row) 

323 except (FileNotFoundError, InvalidFileException): 

324 ######################################################################### 

325 # Google specific stuff 

326 # authenticate to google sheets 

327 logging.info(f"{namespace}: Authenticating to google api.") 

328 creds = app_creds.app_creds() 

329 sheets_api = build("sheets", "v4", credentials=creds) 

330 # get sheet data 

331 if not worksheet: 331 ↛ 340line 331 didn't jump to line 340, because the condition on line 331 was never false

332 sheets = ( 

333 sheets_api.spreadsheets() 

334 .get(spreadsheetId=workbook) 

335 .execute() 

336 .get("sheets", "") 

337 ) 

338 ws = sheets.pop(0).get("properties", {}).get("title") 

339 else: 

340 ws = worksheet 

341 sheet_data = ( 

342 sheets_api.spreadsheets() 

343 .values() 

344 .get(range=ws, spreadsheetId=workbook) 

345 .execute() 

346 .get("values") 

347 ) 

348 ######################################################################### 

349 return sheet_data 

350 

351 

352def sheet_waves(vendor_code, workbook, worksheet, out, scraped_items_db): 

353 namespace = f"{MODULE}.{sheet_waves.__name__}" 

354 

355 addl_data_columns = [ 

356 "Description", 

357 "Dimension", 

358 ] 

359 addl_image_columns = [ 

360 "ImageURL0", 

361 "ImageURL1", 

362 "ImageURL2", 

363 "ImageURL3", 

364 "ImageURL4", 

365 "ImageURL5", 

366 "ImageURL6", 

367 ] 

368 addl_columns = addl_data_columns + addl_image_columns 

369 # get vendor info from database 

370 logging.debug(f"{namespace}: Instantiate vendor.") 

371 vendr = vendor.Vendor(vendor_code) 

372 vendr.set_vendor_data() 

373 

374 isbn_key = vendr.isbn_key 

375 logging.debug(f"{namespace}: Setting ISBN_KEY to '{isbn_key}'.") 

376 

377 sheet_data = get_sheet_data(workbook, worksheet) 

378 

379 sheet_keys = [x for x in sheet_data.pop(0) if x] # filter out None 

380 items_obj = items.Items(sheet_keys, sheet_data, vendr.isbn_key) 

381 items_obj.load_scraped_data(scraped_items_db) 

382 

383 # Load worksheet 

384 logging.info(f"{namespace}: Workbook is {workbook}") 

385 wb = load_workbook(workbook) 

386 ws = get_worksheet(wb, worksheet) 

387 logging.info(f"{namespace}: Worksheet is {ws.title}") 

388 

389 # Append columns 

390 col_insert_idx = ws.max_column + 1 

391 ws.insert_cols(col_insert_idx, len(addl_columns)) 

392 i = 1 

393 for col in addl_columns: 

394 col_idx = col_insert_idx + i 

395 ws.cell(row=1, column=col_idx, value=col) 

396 i = i + 1 

397 

398 # Find ISBN column 

399 row01 = ws[1] 

400 isbn_idx = None 

401 for cell in row01: 401 ↛ 405line 401 didn't jump to line 405, because the loop on line 401 didn't complete

402 if isinstance(cell.value, str) and cell.value.upper() == isbn_key.upper(): 

403 isbn_idx = cell.column - 1 

404 break 

405 if isbn_idx is None: 405 ↛ 406line 405 didn't jump to line 406, because the condition on line 405 was never true

406 logging.error(f"{namespace}: Err no isbn column in spreadsheet") 

407 raise Exception 

408 

409 # Insert data in cells 

410 for row in ws.iter_rows(min_row=2): 

411 # get isbn cell 

412 isbn = str(row[isbn_idx].value) 

413 # find items_obj matching isbn 

414 item = items_obj.find_item(isbn) 

415 if item: 

416 idx = col_insert_idx 

417 for key in addl_data_columns: 

418 if key.upper() in item.data: 418 ↛ 420line 418 didn't jump to line 420, because the condition on line 418 was never false

419 row[idx].value = item.data[key.upper()] 

420 idx = idx + 1 

421 for img_url in item.image_urls[:7]: 

422 row[idx].value = img_url 

423 idx = idx + 1 

424 

425 # Save workbook 

426 wb.save(out)