Coverage for src/artemis_sg/cli.py: 79%

208 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2024-03-06 08:01 -0800

1#!/usr/bin/env python 

2 

3import datetime 

4import logging 

5import os 

6import sys 

7from time import sleep 

8 

9import click 

10from rich.console import Console 

11from rich.progress import track 

12from rich.text import Text 

13from selenium.common.exceptions import NoSuchWindowException 

14 

15from artemis_sg import scraper, spreadsheet 

16from artemis_sg.config import CFG 

17 

18MODULE = os.path.splitext(os.path.basename(__file__))[0] 

19console = Console() 

20 

21v_skip = "{}: skipping due to lack of VENDOR" 

22b_skip = "{}: skipping due to lack of WORKBOOK" 

23 

24 

25@click.group(chain=True) 

26@click.option("-V", "--verbose", is_flag=True, help="enable verbose mode") 

27@click.option("-D", "--debug", is_flag=True, help="enable debug mode") 

28@click.option("-L", "--logfile", is_flag=True, help="log to file") 

29@click.option("-v", "--vendor", default=None, help="Vendor code") 

30@click.option( 

31 "-b", "--workbook", default=None, help="Workbook (Sheets Doc ID or Excel File)" 

32) 

33@click.option("-s", "--worksheet", default=None, help="Worksheet within Sheets Doc") 

34@click.pass_context 

35def cli(ctx, verbose, debug, logfile, vendor, workbook, worksheet): # noqa: PLR0913 

36 """artemis_sg is a tool for processing product spreadsheet data. 

37 Its subcommands are designed to be used to facilitate the follow primary 

38 endpoint conditions: 

39 

40 \b 

41 * A Google Slide Deck of products 

42 * An enhanced Excel spreadsheet 

43 * A website order 

44 

45 The subcommands can be combined into desired workflows. 

46 

47 The base command includes --vendor, --workbook, and --worksheet options. 

48 These are used to pass context information to the subcommands. Some 

49 subcommands expect --vendor and --workbook values to perform as designed. 

50 

51 Example of Google Slide Deck workflow: 

52 

53 $ artemis_sg -v sample -b tests/data/test_sheet.xlsx \\ 

54 scrape download upload generate -t "Cool Deck" 

55 

56 Example of Sheet Image workflow: 

57 

58 $ artemis_sg -v sample -b tests/data/test_sheet.xlsx \\ 

59 scrape download mkthumbs sheet-image -o "NewFile.xlsx" 

60 """ 

61 namespace = f"{MODULE}.cli" 

62 logargs = {"format":"%(levelname)s: %(message)s"} 

63 if logfile: 63 ↛ 64line 63 didn't jump to line 64, because the condition on line 63 was never true

64 dt = datetime.datetime.now(tz=datetime.UTC).strftime("%Y%m%d-%H%M%S") 

65 logfile_name = f"artemis_sg-{dt}.log" 

66 logargs = {**logargs, "filename": logfile_name, "filemode":"w"} 

67 if debug: 67 ↛ 68line 67 didn't jump to line 68, because the condition on line 67 was never true

68 logargs = {**logargs, "level": logging.DEBUG} 

69 logging.basicConfig(**logargs) 

70 logging.debug(f"{namespace}: Debug mode enabled.") 

71 

72 elif verbose: 72 ↛ 73line 72 didn't jump to line 73, because the condition on line 72 was never true

73 logargs = {**logargs, "level": logging.INFO} 

74 logging.basicConfig(**logargs) 

75 logging.info(f"{namespace}: Verbose mode enabled.") 

76 else: 

77 logging.basicConfig(**logargs) 

78 

79 # load up context object (ctx) 

80 ctx.ensure_object(dict) 

81 ctx.obj["VENDOR"] = vendor 

82 ctx.obj["WORKBOOK"] = workbook 

83 ctx.obj["WORKSHEET"] = worksheet 

84 

85 

86@cli.command() 

87@click.pass_context 

88def scrape(ctx): 

89 """Scrape web data for vendor from workbook:worksheet 

90 

91 Iterates over the item rows in the spreadsheet provided by the 

92 --workbook:--worksheet values passed by the base command. The ISBN field 

93 is idenfied by the --vendor value passed by the base command. For each 

94 ISBN in the WORKBOOK:WORKSHEET, it searches for item descriptions and 

95 images in a web browser. It collects this information and stores it in the 

96 file defined by the configuration field [asg.data.file.scraped]. If data 

97 for an ISBN already exists in the datafile, the ISBN is skipped and does 

98 not result in re-scraping data for that record. 

99 

100 Scrape supports both Google Sheet ID and Excel file paths for the WORKBOOK 

101 value. 

102 

103 If a --worksheet is not defined, the first sheet in the WORKBOOK will be 

104 used. If the given WORKBOOK contains multiple sheets and the sheet 

105 containing the desired data is not the first sheet in the WORKBOOK, the 

106 --worksheet will need to be specified for the base command. 

107 

108 The command utilizes configuration variables stored in "config.toml" to set 

109 the vendor from [asg.vendors] and scraped items database from 

110 [asg.data.file.scraped]. 

111 """ 

112 cmd = "scrape" 

113 if ctx.obj["VENDOR"]: 

114 if ctx.obj["WORKBOOK"]: 114 ↛ 127line 114 didn't jump to line 127, because the condition on line 114 was never false

115 sdb = CFG["asg"]["data"]["file"]["scraped"] 

116 msg = ( 

117 f"Scraping web data for '{ctx.obj['VENDOR'] or ''!s}' " 

118 f"using '{ctx.obj['WORKBOOK'] or ''!s}':" 

119 f"'{ctx.obj['WORKSHEET'] or ''!s}', " 

120 f"saving data to '{sdb}'..." 

121 ) 

122 click.echo(msg) 

123 scraper_wrapper( 

124 ctx.obj["VENDOR"], ctx.obj["WORKBOOK"], ctx.obj["WORKSHEET"], sdb 

125 ) 

126 else: 

127 click.echo(b_skip.format(cmd), err=True) 

128 else: 

129 click.echo(v_skip.format(cmd), err=True) 

130 

131 

132@cli.command() 

133def download(): 

134 """ 

135 Download scraped images 

136 

137 Iterates over the data records in the file defined by the configuration 

138 field [asg.data.file.scraped]. For each record, it downloads the image 

139 files associated with the record to a local directory as defined by the 

140 configuration field [asg.data.dir.images]. 

141 """ 

142 namespace = f"{MODULE}.download" 

143 

144 download_path = CFG["asg"]["data"]["dir"]["images"] 

145 click.echo("Downloading images...") 

146 logging.debug(f"{namespace}: Download path is: {download_path}") 

147 

148 img_downloader_wrapper() 

149 

150 

151@cli.command() 

152def upload(): 

153 """ 

154 Upload local images to Google Cloud Storage Bucket 

155 

156 Uploads the files in the directory defined by the configuration field 

157 [asg.data.dir.upload_source] to the Google Cloud bucket defined by the 

158 configuration field [google.cloud.bucket]. Only the first level of the 

159 source directory is uploaded. Subdirectories of the source directory are 

160 not traversed for the upload. All uploaded files are prefixed with value 

161 defined by the configuration field [google.cloud.bucket_prefix]. 

162 """ 

163 namespace = f"{MODULE}.upload" 

164 

165 upload_source = CFG["asg"]["data"]["dir"]["upload_source"] 

166 click.echo("Uploading images to Google Cloud...") 

167 logging.debug(f"{namespace}: Upload source path is: {upload_source}") 

168 

169 gcloud_wrapper() 

170 

171 

172@cli.command() 

173@click.option("-t", "--title", 

174 default=CFG["asg"]["slide_generator"]["title_default"], 

175 help="Slide deck title") 

176@click.pass_context 

177def generate(ctx, title): 

178 """ 

179 Generate a Google Slide Deck 

180 

181 

182 The slide deck will be given a title based on the values supplied by VENDOR 

183 and --title. The title slide will be in the following format: 

184 

185 Artemis Book Sales Presents... 

186 Vendor Name, Title 

187 

188 Iterates over item rows in the spreadsheet provided by the 

189 --workbook:--worksheet values passed by the base command. The ISBN field 

190 is idenfied by the --vendor value passed by the base command. For each 

191 ISBN in the WORKBOOK:WORKSHEET 

192 for which it has image data it creates a slide containing the 

193 spreadsheet data, the description saved in the file defined by the configuration 

194 field [asg.data.file.scraped], and the images saved in the 

195 [google.cloud.bucket]. The Google sheet will be saved to the root of the 

196 Google Drive associated with the credentials created during initial 

197 installation. 

198 

199 Generate supports both Google Sheet ID and Excel file paths for the WORKBOOK 

200 value. 

201 

202 If a --worksheet is not defined, the first sheet in the WORKBOOK will be 

203 used. If the given WORKBOOK contains multiple sheets and the sheet 

204 containing the desired data is not the first sheet in the WORKBOOK, the 

205 --worksheet will need to be specified for the base command. 

206 

207 The command utilizes configuration variables stored in "config.toml" to set 

208 the vendor from [asg.vendors] and scraped items database from 

209 [asg.data.file.scraped]. 

210 """ 

211 cmd = "generate" 

212 namespace = f"{MODULE}.{cmd}" 

213 

214 sdb = CFG["asg"]["data"]["file"]["scraped"] 

215 msg = ( 

216 f"Creating Google Slides deck '{title}' for '{ctx.obj['VENDOR'] or ''!s}' " 

217 f"using '{ctx.obj['WORKBOOK'] or ''!s}':'{ctx.obj['WORKSHEET'] or ''!s}'..." 

218 ) 

219 click.echo(msg) 

220 logging.debug(f"{namespace}: Scraped Items Database is: {sdb}") 

221 

222 try: 

223 slide_generator_wrapper( 

224 ctx.obj["VENDOR"], ctx.obj["WORKBOOK"], ctx.obj["WORKSHEET"], sdb, title 

225 ) 

226 except Exception as e: 

227 click.echo(f"Could not generate slide deck:{e}", err=True) 

228 if not ctx.obj["VENDOR"]: 228 ↛ 230line 228 didn't jump to line 230, because the condition on line 228 was never false

229 click.echo("\tVENDOR not provided", err=True) 

230 if not ctx.obj["WORKBOOK"]: 230 ↛ exitline 230 didn't return from function 'generate', because the condition on line 230 was never false

231 click.echo("\tWORKBOOK not provided", err=True) 

232 

233 

234@cli.command() 

235@click.option("-o", "--output", "out", default="out.xlsx", help="Output file") 

236@click.pass_context 

237def sheet_image(ctx, out): 

238 """ 

239 Insert item thumbnail images into spreadsheet 

240 

241 Iterates over item rows in the spreadsheet provided by the 

242 --workbook:--worksheet values passed by the base command. The ISBN field 

243 is idenfied by the --vendor value passed by the base command. For each 

244 

245 Modifies a local XLSX spreadsheet file provided by the 

246 --workbook:--worksheet values passed by the base command to include 

247 thumbnail images in the second column for ISBN items (field itentified by 

248 --vendor) in which local thumbnail image files are available and saves a 

249 new XLSX file. 

250 

251 By default, the thumbnail images are obtained from 

252 [asg.data.dir.images]/thumbnails and the new XLSX file is saved as 

253 "out.xlsx" in the current working directory. 

254 

255 NOTE: Currently, the command does not support Google Sheet IDs as a valid 

256 WORKBOOK type. 

257 

258 If a --worksheet is not defined, the first sheet in the WORKBOOK will be 

259 used. If the given WORKBOOK contains multiple sheets and the sheet 

260 containing the desired data is not the first sheet in the WORKBOOK, the 

261 --worksheet will need to be specified for the base command. 

262 

263 The command utilizes configuration variables stored in "config.toml" to set 

264 the vendor from [asg.vendors]. 

265 """ 

266 cmd = "sheet-image" 

267 namespace = f"{MODULE}.sheet_image" 

268 

269 if ctx.obj["VENDOR"]: 269 ↛ 295line 269 didn't jump to line 295, because the condition on line 269 was never false

270 if ctx.obj["WORKBOOK"]: 270 ↛ 293line 270 didn't jump to line 293, because the condition on line 270 was never false

271 download_path = CFG["asg"]["data"]["dir"]["images"] 

272 image_directory = os.path.join(download_path, "thumbnails") 

273 msg = ( 

274 f"Creating image enhanced spreadsheet for " 

275 f"'{ctx.obj['VENDOR'] or ''!s}' " 

276 f"using '{ctx.obj['WORKBOOK'] or ''!s}':" 

277 f"'{ctx.obj['WORKSHEET'] or ''!s}', " 

278 f"saving Excel file to '{out}'..." 

279 ) 

280 click.echo(msg) 

281 logging.debug( 

282 f"{namespace}: Thumbnail Image Directory is: {image_directory}" 

283 ) 

284 

285 sheet_image_wrapper( 

286 ctx.obj["VENDOR"], 

287 ctx.obj["WORKBOOK"], 

288 ctx.obj["WORKSHEET"], 

289 image_directory, 

290 out, 

291 ) 

292 else: 

293 click.echo(b_skip.format(cmd), err=True) 

294 else: 

295 click.echo(v_skip.format(cmd), err=True) 

296 

297@cli.command() 

298@click.option("-o", "--output", "out", default="out.xlsx", help="Output file") 

299@click.pass_context 

300def sheet_waves(ctx, out): 

301 """ 

302 Insert data columns into spreadsheet 

303 

304 \b 

305 * Description 

306 * Dimension 

307 * ImageURL0-6 

308 

309 Modifies a local XLSX spreadsheet file provided by the 

310 --workbook:--worksheet values passed by the base command to include 

311 additional columns for ISBN items (field identified by 

312 --vendor) and saves a 

313 new XLSX file. 

314 

315 Iterates over item rows in the spreadsheet provided by the 

316 --workbook:--worksheet values passed by the base command. The ISBN field 

317 is identified by the --vendor value passed by the base command. For each, 

318 values are inserted into the added spreadsheet columns 

319 

320 By default, the new XLSX file is saved as "out.xlsx" in the current working 

321 directory. 

322 

323 NOTE: Currently, the command does not support Google Sheet IDs as a valid 

324 WORKBOOK type. 

325 

326 If a --worksheet is not defined, the first sheet in the WORKBOOK will be 

327 used. If the given WORKBOOK contains multiple sheets and the sheet 

328 containing the desired data is not the first sheet in the WORKBOOK, the 

329 --worksheet will need to be specified for the base command. 

330 

331 The command utilizes configuration variables stored in "config.toml" to set 

332 the vendor from [asg.vendors]. 

333 """ 

334 cmd = "sheet-waves" 

335 if ctx.obj["VENDOR"]: 335 ↛ 357line 335 didn't jump to line 357, because the condition on line 335 was never false

336 if ctx.obj["WORKBOOK"]: 336 ↛ 355line 336 didn't jump to line 355, because the condition on line 336 was never false

337 msg = ( 

338 f"Creating waves import spreadsheet for " 

339 f"'{ctx.obj['VENDOR'] or ''!s}' " 

340 f"using '{ctx.obj['WORKBOOK'] or ''!s}':" 

341 f"'{ctx.obj['WORKSHEET'] or ''!s}', " 

342 f"saving Excel file to '{out}'..." 

343 ) 

344 click.echo(msg) 

345 

346 sdb = CFG["asg"]["data"]["file"]["scraped"] 

347 sheet_waves_wrapper( 

348 ctx.obj["VENDOR"], 

349 ctx.obj["WORKBOOK"], 

350 ctx.obj["WORKSHEET"], 

351 out, 

352 sdb 

353 ) 

354 else: 

355 click.echo(b_skip.format(cmd), err=True) 

356 else: 

357 click.echo(v_skip.format(cmd), err=True) 

358 

359 

360@cli.command() 

361@click.option( 

362 "--image-directory", 

363 default=CFG["asg"]["data"]["dir"]["images"], 

364 help="Image directory", 

365) 

366def mkthumbs(image_directory): 

367 """ 

368 Create thumbnails of images in IMAGE_DIRECTORY 

369 

370 Creates thumbnail images from images located in a given directory. These 

371 thumbnail images are saved to a "thumbnails" subdirectory in the original 

372 image directory. These files are given the same names as their originals. 

373 

374 By default, the command will use the directory defined by the configuration 

375 field [asg.data.dir.images] and size them to the dimensions defined by 

376 [asg.spreadsheet.mkthumbs.width] and [asg.spreadsheet.mkthumbs.height]. 

377 """ 

378 namespace = f"{MODULE}.mkthumbs" 

379 

380 click.echo(f"Creating thumbnails of images in '{image_directory}'...") 

381 logging.debug(f"{namespace}: Image Directory is: {image_directory}") 

382 

383 mkthumbs_wrapper(image_directory) 

384 

385 

386@cli.command() 

387@click.option("--email", "email", default="", help="TB Customer email to impersonate") 

388@click.option( 

389 "--timeout", "timeout", default="600", help="Maximum time to hold browser open" 

390) 

391@click.pass_context 

392def order(ctx, email, timeout): 

393 """ 

394 Add items to be ordered to website cart of vendor from spreadsheet 

395 

396 Populates the website cart for a given --vendor with items from a 

397 --workbook:--worksheet. The WORKSHEET MUST contain an "Order" column from 

398 which the command will get the quantity of each item to put into the cart. 

399 

400 The browser instance with the populated cart is left open for the user to 

401 review and manually complete the order. The user will be asked to manually 

402 login during the execution of this command. 

403 

404 NOTE: Currently, this command does not support Google Sheet IDs as a valid 

405 WORKBOOK type. 

406 

407 If a --worksheet is not defined, the first sheet in the WORKBOOK will be 

408 used. If the given WORKBOOK contains multiple sheets and the sheet 

409 containing the desired data is not the first sheet in the WORKBOOK, the 

410 --worksheet will need to be specified for the base command. 

411 

412 NOTE: The browser opened by this command is controlled by this command. 

413 The browser will automatically close and the session will be terminated at 

414 the end of the defined waiting period. If the web order has not been 

415 completed by the end of the waiting period, the cart may be lost depending 

416 on how the website handles its session data. 

417 

418 The command utilizes configuration variables stored in "config.toml" to set 

419 the vendor from [asg.vendors]. 

420 """ 

421 cmd = "order" 

422 timeout = int(timeout) 

423 if ctx.obj["VENDOR"]: 423 ↛ 443line 423 didn't jump to line 443, because the condition on line 423 was never false

424 if ctx.obj["WORKBOOK"]: 424 ↛ 441line 424 didn't jump to line 441, because the condition on line 424 was never false

425 msg = ( 

426 f"Creating web order for '{ctx.obj['VENDOR'] or ''!s}' " 

427 f"using '{ctx.obj['WORKBOOK'] or ''!s}':" 

428 f"'{ctx.obj['WORKSHEET'] or ''!s}', " 

429 f"Adding items to cart..." 

430 ) 

431 click.echo(msg) 

432 

433 order_wrapper( 

434 email, 

435 ctx.obj["VENDOR"], 

436 ctx.obj["WORKBOOK"], 

437 ctx.obj["WORKSHEET"], 

438 timeout, 

439 ) 

440 else: 

441 click.echo(b_skip.format(cmd), err=True) 

442 else: 

443 click.echo(v_skip.format(cmd), err=True) 

444 

445 

446# wrappers to make the cli testable 

447def slide_generator_wrapper(vendor, sheet_id, worksheet, sdb, title): 

448 from artemis_sg import slide_generator 

449 

450 slide_generator.main(vendor, sheet_id, worksheet, sdb, title) 

451 

452 

453def gcloud_wrapper(): 

454 from artemis_sg import gcloud 

455 

456 gcloud.main() 

457 

458 

459def img_downloader_wrapper(): 

460 from artemis_sg import img_downloader 

461 

462 img_downloader.main() 

463 

464 

465def scraper_wrapper(vendor, sheet_id, worksheet, sdb): 

466 from artemis_sg import scraper 

467 

468 scraper.main(vendor, sheet_id, worksheet, sdb) 

469 

470 

471def sheet_image_wrapper(vendor, workbook, worksheet, image_directory, out): 

472 spreadsheet.sheet_image(vendor, workbook, worksheet, image_directory, out) 

473 

474 

475def mkthumbs_wrapper(image_directory): 

476 spreadsheet.mkthumbs(image_directory) 

477 

478 

479def order_wrapper(email, vendor, workbook, worksheet, timeout=600): 

480 order_items = spreadsheet.get_order_items(vendor, workbook, worksheet) 

481 driver, scrapr = get_driver_scraper(vendor, email) 

482 if scrapr: 482 ↛ 520line 482 didn't jump to line 520, because the condition on line 482 was never false

483 scrapr.load_login_page() 

484 scrapr.login() 

485 if vendor == "tb": 

486 scrapr.impersonate(email) 

487 for item, qty in order_items: 

488 if vendor == "tb": 

489 item_num = scrapr.search_item_num(item) 

490 if not item_num: 490 ↛ 491line 490 didn't jump to line 491, because the condition on line 490 was never true

491 continue 

492 else: 

493 item_num = item 

494 res = scrapr.load_item_page(item_num) 

495 if res: 495 ↛ 487line 495 didn't jump to line 487, because the condition on line 495 was never false

496 scrapr.add_to_cart(qty) 

497 scrapr.load_cart_page() 

498 input_text = Text( 

499 """ 

500 ******** USER INPUT REQUIRED ******** 

501 Locate the selenium controlled browser 

502 and manually review and complete your order. 

503 ******** WAITING FOR USER INPUT ******** 

504 """ 

505 ) 

506 input_text.stylize("bold cyan") 

507 console.print(input_text) 

508 warn_text = Text( 

509 f"WARNING: The browser session will terminate in {timeout} seconds!!!!" 

510 ) 

511 warn_text.stylize("bold red") 

512 console.print(warn_text) 

513 for _i in track( 513 ↛ 516line 513 didn't jump to line 516, because the loop on line 513 never started

514 range(timeout), description="[red]COUNTING DOWN TIME REMAINING..." 

515 ): 

516 if not is_browser_alive(driver): 

517 break 

518 sleep(1) 

519 else: 

520 logging.error( 

521 f"order: VENDOR '{vendor}' is not supported by the order command." 

522 ) 

523 sys.exit(1) 

524 

525 

526def sheet_waves_wrapper(vendor, workbook, worksheet, out, scraped_items_db): 

527 spreadsheet.sheet_waves(vendor, workbook, worksheet, out, scraped_items_db) 

528 

529 

530def get_driver_scraper(vendor, email=None): 

531 if vendor == "tb": 

532 if not email: 532 ↛ 533line 532 didn't jump to line 533, because the condition on line 532 was never true

533 logging.error( 

534 f"order: VENDOR '{vendor}' requires the '--email' option to be set." 

535 ) 

536 sys.exit(1) 

537 driver = scraper.get_driver() 

538 scrapr = scraper.TBScraper(driver) 

539 elif vendor == "gj": 539 ↛ 542line 539 didn't jump to line 542, because the condition on line 539 was never false

540 driver = scraper.get_driver() 

541 scrapr = scraper.GJScraper(driver) 

542 elif vendor == "sd": 

543 driver = scraper.get_driver() 

544 scrapr = scraper.SDScraper(driver) 

545 else: 

546 driver = scrapr = None 

547 return driver, scrapr 

548 

549 

550def is_browser_alive(driver): 

551 try: 

552 url = driver.current_url 

553 if url: 

554 return True 

555 except (AttributeError, NoSuchWindowException): 

556 return False 

557 

558 

559if __name__ == "__main__": 

560 cli()