Coverage for src/artemis_sg/cli.py: 80%

184 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-12 06:30 -0700

1#!/usr/bin/env python 

2 

3import logging 

4import os 

5import sys 

6from time import sleep 

7 

8import click 

9from rich.console import Console 

10from rich.progress import track 

11from rich.text import Text 

12from selenium.common.exceptions import NoSuchWindowException 

13 

14from artemis_sg import scraper, spreadsheet 

15from artemis_sg.config import CFG 

16 

17MODULE = os.path.splitext(os.path.basename(__file__))[0] 

18console = Console() 

19 

20v_skip = "{}: skipping due to lack of VENDOR" 

21b_skip = "{}: skipping due to lack of WORKBOOK" 

22 

23 

24@click.group(chain=True) 

25@click.option("-V", "--verbose", is_flag=True, help="enable verbose mode") 

26@click.option("-D", "--debug", is_flag=True, help="enable debug mode") 

27@click.option("-v", "--vendor", default=None, help="Vendor code") 

28@click.option( 

29 "-b", "--workbook", default=None, help="Workbook (Sheets Doc ID or Excel File)" 

30) 

31@click.option("-s", "--worksheet", default=None, help="Worksheet within Sheets Doc") 

32@click.pass_context 

33def cli(ctx, verbose, debug, vendor, workbook, worksheet): 

34 """artemis_sg is a tool for processing product spreadsheet data. 

35 Its subcommands are designed to be used to facilitate the follow primary 

36 endpoint conditions: 

37 

38 \b 

39 * A Google Slide Deck of products 

40 * An enhanced Excel spreadsheet 

41 * A website order 

42 

43 The subcommands can be combined into desired workflows. 

44 

45 The base command includes --vendor, --workbook, and --worksheet options. 

46 These are used to pass context information to the subcommands. Some 

47 subcommands expect --vendor and --workbook values to perform as designed. 

48 

49 Example of Google Slide Deck workflow: 

50 

51 $ artemis_sg -v sample -b tests/data/test_sheet.xlsx \\ 

52 scrape download upload generate -t "Cool Deck" 

53 

54 Example of Sheet Image workflow: 

55 

56 $ artemis_sg -v sample -b tests/data/test_sheet.xlsx \\ 

57 scrape download mkthumbs sheet-image -o "NewFile.xlsx" 

58 """ 

59 namespace = f"{MODULE}.cli" 

60 if debug: 60 ↛ 61line 60 didn't jump to line 61, because the condition on line 60 was never true

61 logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.DEBUG) 

62 logging.debug(f"{namespace}: Debug mode enabled.") 

63 

64 elif verbose: 64 ↛ 65line 64 didn't jump to line 65, because the condition on line 64 was never true

65 logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.INFO) 

66 logging.info(f"{namespace}: Verbose mode enabled.") 

67 else: 

68 logging.basicConfig(format="%(levelname)s: %(message)s") 

69 

70 # load up context object (ctx) 

71 ctx.ensure_object(dict) 

72 ctx.obj["VENDOR"] = vendor 

73 ctx.obj["WORKBOOK"] = workbook 

74 ctx.obj["WORKSHEET"] = worksheet 

75 

76 

77@cli.command() 

78@click.pass_context 

79def scrape(ctx): 

80 """Scrape web data for vendor from workbook:worksheet 

81 

82 Iterates over the item rows in the spreadsheet provided by the 

83 --workbook:--worksheet values passed by the base command. The ISBN field 

84 is idenfied by the --vendor value passed by the base command. For each 

85 ISBN in the WORKBOOK:WORKSHEET, it searches for item descriptions and 

86 images in a web browser. It collects this information and stores it in the 

87 file defined by the configuration field [asg.data.file.scraped]. If data 

88 for an ISBN already exists in the datafile, the ISBN is skipped and does 

89 not result in re-scraping data for that record. 

90 

91 Scrape supports both Google Sheet ID and Excel file paths for the WORKBOOK 

92 value. 

93 

94 If a --worksheet is not defined, the first sheet in the WORKBOOK will be 

95 used. If the given WORKBOOK contains multiple sheets and the sheet 

96 containing the desired data is not the first sheet in the WORKBOOK, the 

97 --worksheet will need to be specified for the base command. 

98 

99 The command utilizes configuration variables stored in "config.toml" to set 

100 the vendor from [asg.vendors] and scraped items database from 

101 [asg.data.file.scraped]. 

102 """ 

103 cmd = "scrape" 

104 if ctx.obj["VENDOR"]: 

105 if ctx.obj["WORKBOOK"]: 105 ↛ 118line 105 didn't jump to line 118, because the condition on line 105 was never false

106 sdb = CFG["asg"]["data"]["file"]["scraped"] 

107 msg = ( 

108 f"Scraping web data for '{ctx.obj['VENDOR'] or ''!s}' " 

109 f"using '{ctx.obj['WORKBOOK'] or ''!s}':" 

110 f"'{ctx.obj['WORKSHEET'] or ''!s}', " 

111 f"saving data to '{sdb}'..." 

112 ) 

113 click.echo(msg) 

114 scraper_wrapper( 

115 ctx.obj["VENDOR"], ctx.obj["WORKBOOK"], ctx.obj["WORKSHEET"], sdb 

116 ) 

117 else: 

118 click.echo(b_skip.format(cmd), err=True) 

119 else: 

120 click.echo(v_skip.format(cmd), err=True) 

121 

122 

123@cli.command() 

124def download(): 

125 """ 

126 Download scraped images 

127 

128 Iterates over the data records in the file defined by the configuration 

129 field [asg.data.file.scraped]. For each record, it downloads the image 

130 files associated with the record to a local directory as defined by the 

131 configuration field [asg.data.dir.images]. 

132 """ 

133 namespace = f"{MODULE}.download" 

134 

135 download_path = CFG["asg"]["data"]["dir"]["images"] 

136 click.echo("Downloading images...") 

137 logging.debug(f"{namespace}: Download path is: {download_path}") 

138 

139 img_downloader_wrapper() 

140 

141 

142@cli.command() 

143def upload(): 

144 """ 

145 Upload local images to Google Cloud Storage Bucket 

146 

147 Uploads the files in the directory defined by the configuration field 

148 [asg.data.dir.upload_source] to the Google Cloud bucket defined by the 

149 configuration field [google.cloud.bucket]. Only the first level of the 

150 source directory is uploaded. Subdirectories of the source directory are 

151 not traversed for the upload. All uploaded files are prefixed with value 

152 defined by the configuration field [google.cloud.bucket_prefix]. 

153 """ 

154 namespace = f"{MODULE}.upload" 

155 

156 upload_source = CFG["asg"]["data"]["dir"]["upload_source"] 

157 click.echo("Uploading images to Google Cloud...") 

158 logging.debug(f"{namespace}: Upload source path is: {upload_source}") 

159 

160 gcloud_wrapper() 

161 

162 

163# TODO: (#163) move title default to CFG 

164@cli.command() 

165@click.option("-t", "--title", default="New Arrivals", help="Slide deck title") 

166@click.pass_context 

167def generate(ctx, title): 

168 """ 

169 Generate a Google Slide Deck 

170 

171 

172 The slide deck will be given a title based on the values supplied by VENDOR 

173 and --title. The title slide will be in the following format: 

174 

175 Artemis Book Sales Presents... 

176 Vendor Name, Title 

177 

178 Iterates over item rows in the spreadsheet provided by the 

179 --workbook:--worksheet values passed by the base command. The ISBN field 

180 is idenfied by the --vendor value passed by the base command. For each 

181 ISBN in the WORKBOOK:WORKSHEET 

182 for which it has image data it creates a slide containing the 

183 spreadsheet data, the description saved in the file defined by the configuration 

184 field [asg.data.file.scraped], and the images saved in the 

185 [google.cloud.bucket]. The Google sheet will be saved to the root of the 

186 Google Drive associated with the credentials created during initial 

187 installation. 

188 

189 Generate supports both Google Sheet ID and Excel file paths for the WORKBOOK 

190 value. 

191 

192 If a --worksheet is not defined, the first sheet in the WORKBOOK will be 

193 used. If the given WORKBOOK contains multiple sheets and the sheet 

194 containing the desired data is not the first sheet in the WORKBOOK, the 

195 --worksheet will need to be specified for the base command. 

196 

197 The command utilizes configuration variables stored in "config.toml" to set 

198 the vendor from [asg.vendors] and scraped items database from 

199 [asg.data.file.scraped]. 

200 """ 

201 cmd = "generate" 

202 namespace = f"{MODULE}.{cmd}" 

203 

204 sdb = CFG["asg"]["data"]["file"]["scraped"] 

205 msg = ( 

206 f"Creating Google Slides deck '{title}' for '{ctx.obj['VENDOR'] or ''!s}' " 

207 f"using '{ctx.obj['WORKBOOK'] or ''!s}':'{ctx.obj['WORKSHEET'] or ''!s}'..." 

208 ) 

209 click.echo(msg) 

210 logging.debug(f"{namespace}: Scraped Items Database is: {sdb}") 

211 

212 try: 

213 slide_generator_wrapper( 

214 ctx.obj["VENDOR"], ctx.obj["WORKBOOK"], ctx.obj["WORKSHEET"], sdb, title 

215 ) 

216 except Exception as e: 

217 click.echo(f"Could not generate slide deck:{e}", err=True) 

218 if not ctx.obj["VENDOR"]: 218 ↛ 220line 218 didn't jump to line 220, because the condition on line 218 was never false

219 click.echo("\tVENDOR not provided", err=True) 

220 if not ctx.obj["WORKBOOK"]: 220 ↛ exitline 220 didn't return from function 'generate', because the condition on line 220 was never false

221 click.echo("\tWORKBOOK not provided", err=True) 

222 

223 

224@cli.command() 

225@click.option("-o", "--output", "out", default="out.xlsx", help="Output file") 

226@click.pass_context 

227def sheet_image(ctx, out): 

228 """ 

229 Insert item thumbnail images into spreadsheet 

230 

231 Iterates over item rows in the spreadsheet provided by the 

232 --workbook:--worksheet values passed by the base command. The ISBN field 

233 is idenfied by the --vendor value passed by the base command. For each 

234 

235 Modifies a local XLSX spreadsheet file provided by the 

236 --workbook:--worksheet values passed by the base command to include 

237 thumbnail images in the second column for ISBN items (field itentified by 

238 --vendor) in which local thumbnail image files are available and saves a 

239 new XLSX file. 

240 

241 By default, the thumbnail images are obtained from 

242 [asg.data.dir.images]/thumbnails and the new XLSX file is saved as 

243 "out.xlsx" in the current working directory. 

244 

245 NOTE: Currently, the command does not support Google Sheet IDs as a valid 

246 WORKBOOK type. 

247 

248 If a --worksheet is not defined, the first sheet in the WORKBOOK will be 

249 used. If the given WORKBOOK contains multiple sheets and the sheet 

250 containing the desired data is not the first sheet in the WORKBOOK, the 

251 --worksheet will need to be specified for the base command. 

252 

253 The command utilizes configuration variables stored in "config.toml" to set 

254 the vendor from [asg.vendors]. 

255 """ 

256 cmd = "sheet-image" 

257 namespace = f"{MODULE}.sheet_image" 

258 

259 if ctx.obj["VENDOR"]: 259 ↛ 285line 259 didn't jump to line 285, because the condition on line 259 was never false

260 if ctx.obj["WORKBOOK"]: 260 ↛ 283line 260 didn't jump to line 283, because the condition on line 260 was never false

261 download_path = CFG["asg"]["data"]["dir"]["images"] 

262 image_directory = os.path.join(download_path, "thumbnails") 

263 msg = ( 

264 f"Creating image enhanced spreadsheet for " 

265 f"'{ctx.obj['VENDOR'] or ''!s}' " 

266 f"using '{ctx.obj['WORKBOOK'] or ''!s}':" 

267 f"'{ctx.obj['WORKSHEET'] or ''!s}', " 

268 f"saving Excel file to '{out}'..." 

269 ) 

270 click.echo(msg) 

271 logging.debug( 

272 f"{namespace}: Thumbnail Image Directory is: {image_directory}" 

273 ) 

274 

275 sheet_image_wrapper( 

276 ctx.obj["VENDOR"], 

277 ctx.obj["WORKBOOK"], 

278 ctx.obj["WORKSHEET"], 

279 image_directory, 

280 out, 

281 ) 

282 else: 

283 click.echo(b_skip.format(cmd), err=True) 

284 else: 

285 click.echo(v_skip.format(cmd), err=True) 

286 

287 

288@cli.command() 

289@click.option( 

290 "--image-directory", 

291 default=CFG["asg"]["data"]["dir"]["images"], 

292 help="Image directory", 

293) 

294def mkthumbs(image_directory): 

295 """ 

296 Create thumbnails of images in IMAGE_DIRECTORY 

297 

298 Creates thumbnail images from images located in a given directory. These 

299 thumbnail images are saved to a "thumbnails" subdirectory in the original 

300 image directory. These files are given the same names as their originals. 

301 

302 By default, the command will use the directory defined by the configuration 

303 field [asg.data.dir.images] and size them to the dimensions defined by 

304 [asg.spreadsheet.mkthumbs.width] and [asg.spreadsheet.mkthumbs.height]. 

305 """ 

306 namespace = f"{MODULE}.mkthumbs" 

307 

308 click.echo(f"Creating thumbnails of images in '{image_directory}'...") 

309 logging.debug(f"{namespace}: Image Directory is: {image_directory}") 

310 

311 mkthumbs_wrapper(image_directory) 

312 

313 

314@cli.command() 

315@click.option("--email", "email", default="", help="TB Customer email to impersonate") 

316@click.option( 

317 "--timeout", "timeout", default="600", help="Maximum time to hold browser open" 

318) 

319@click.pass_context 

320def order(ctx, email, timeout): 

321 """ 

322 Add items to be ordered to website cart of vendor from spreadsheet 

323 

324 Populates the website cart for a given --vendor with items from a 

325 --workbook:--worksheet. The WORKSHEET MUST contain an "Order" column from 

326 which the command will get the quantity of each item to put into the cart. 

327 

328 The browser instance with the populated cart is left open for the user to 

329 review and manually complete the order. The user will be asked to manually 

330 login during the execution of this command. 

331 

332 NOTE: Currently, this command does not support Google Sheet IDs as a valid 

333 WORKBOOK type. 

334 

335 If a --worksheet is not defined, the first sheet in the WORKBOOK will be 

336 used. If the given WORKBOOK contains multiple sheets and the sheet 

337 containing the desired data is not the first sheet in the WORKBOOK, the 

338 --worksheet will need to be specified for the base command. 

339 

340 NOTE: The browser opened by this command is controlled by this command. 

341 The browser will automatically close and the session will be terminated at 

342 the end of the defined waiting period. If the web order has not been 

343 completed by the end of the waiting period, the cart may be lost depending 

344 on how the website handles its session data. 

345 

346 The command utilizes configuration variables stored in "config.toml" to set 

347 the vendor from [asg.vendors]. 

348 """ 

349 cmd = "order" 

350 timeout = int(timeout) 

351 if ctx.obj["VENDOR"]: 351 ↛ 371line 351 didn't jump to line 371, because the condition on line 351 was never false

352 if ctx.obj["WORKBOOK"]: 352 ↛ 369line 352 didn't jump to line 369, because the condition on line 352 was never false

353 msg = ( 

354 f"Creating web order for '{ctx.obj['VENDOR'] or ''!s}' " 

355 f"using '{ctx.obj['WORKBOOK'] or ''!s}':" 

356 f"'{ctx.obj['WORKSHEET'] or ''!s}', " 

357 f"Adding items to cart..." 

358 ) 

359 click.echo(msg) 

360 

361 order_wrapper( 

362 email, 

363 ctx.obj["VENDOR"], 

364 ctx.obj["WORKBOOK"], 

365 ctx.obj["WORKSHEET"], 

366 timeout, 

367 ) 

368 else: 

369 click.echo(b_skip.format(cmd), err=True) 

370 else: 

371 click.echo(v_skip.format(cmd), err=True) 

372 

373 

374# wrappers to make the cli testable 

375def slide_generator_wrapper(vendor, sheet_id, worksheet, sdb, title): 

376 from artemis_sg import slide_generator 

377 

378 slide_generator.main(vendor, sheet_id, worksheet, sdb, title) 

379 

380 

381def gcloud_wrapper(): 

382 from artemis_sg import gcloud 

383 

384 gcloud.main() 

385 

386 

387def img_downloader_wrapper(): 

388 from artemis_sg import img_downloader 

389 

390 img_downloader.main() 

391 

392 

393def scraper_wrapper(vendor, sheet_id, worksheet, sdb): 

394 from artemis_sg import scraper 

395 

396 scraper.main(vendor, sheet_id, worksheet, sdb) 

397 

398 

399def sheet_image_wrapper(vendor, workbook, worksheet, image_directory, out): 

400 spreadsheet.sheet_image(vendor, workbook, worksheet, image_directory, out) 

401 

402 

403def mkthumbs_wrapper(image_directory): 

404 spreadsheet.mkthumbs(image_directory) 

405 

406 

407def order_wrapper(email, vendor, workbook, worksheet, timeout=600): 

408 order_items = spreadsheet.get_order_items(vendor, workbook, worksheet) 

409 driver, scrapr = get_driver_scraper(vendor, email) 

410 if scrapr: 410 ↛ 448line 410 didn't jump to line 448, because the condition on line 410 was never false

411 scrapr.load_login_page() 

412 scrapr.login() 

413 if vendor == "tb": 

414 scrapr.impersonate(email) 

415 for item, qty in order_items: 

416 if vendor == "tb": 

417 item_num = scrapr.search_item_num(item) 

418 if not item_num: 418 ↛ 419line 418 didn't jump to line 419, because the condition on line 418 was never true

419 continue 

420 else: 

421 item_num = item 

422 res = scrapr.load_item_page(item_num) 

423 if res: 423 ↛ 415line 423 didn't jump to line 415, because the condition on line 423 was never false

424 scrapr.add_to_cart(qty) 

425 scrapr.load_cart_page() 

426 input_text = Text( 

427 """ 

428 ******** USER INPUT REQUIRED ******** 

429 Locate the selenium controlled browser 

430 and manually review and complete your order. 

431 ******** WAITING FOR USER INPUT ******** 

432 """ 

433 ) 

434 input_text.stylize("bold cyan") 

435 console.print(input_text) 

436 warn_text = Text( 

437 f"WARNING: The browser session will terminate in {timeout} seconds!!!!" 

438 ) 

439 warn_text.stylize("bold red") 

440 console.print(warn_text) 

441 for _i in track( 441 ↛ 444line 441 didn't jump to line 444, because the loop on line 441 never started

442 range(timeout), description="[red]COUNTING DOWN TIME REMAINING..." 

443 ): 

444 if not is_browser_alive(driver): 

445 break 

446 sleep(1) 

447 else: 

448 logging.error( 

449 f"order: VENDOR '{vendor}' is not supported by the order command." 

450 ) 

451 sys.exit(1) 

452 

453 

454def get_driver_scraper(vendor, email=None): 

455 if vendor == "tb": 

456 if not email: 456 ↛ 457line 456 didn't jump to line 457, because the condition on line 456 was never true

457 logging.error( 

458 f"order: VENDOR '{vendor}' requires the '--email' option to be set." 

459 ) 

460 sys.exit(1) 

461 driver = scraper.get_driver() 

462 scrapr = scraper.TBScraper(driver) 

463 elif vendor == "gj": 463 ↛ 466line 463 didn't jump to line 466, because the condition on line 463 was never false

464 driver = scraper.get_driver() 

465 scrapr = scraper.GJScraper(driver) 

466 elif vendor == "sd": 

467 driver = scraper.get_driver() 

468 scrapr = scraper.SDScraper(driver) 

469 else: 

470 driver = scrapr = None 

471 return driver, scrapr 

472 

473 

474def is_browser_alive(driver): 

475 try: 

476 url = driver.current_url 

477 if url: 

478 return True 

479 except (AttributeError, NoSuchWindowException): 

480 return False 

481 

482 

483if __name__ == "__main__": 

484 cli()