Coverage for src/artemis_sg/cli.py: 81%

176 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-05 09:33 -0700

1#!/usr/bin/env python 

2# -*- coding: utf-8 -*- 

3 

4import logging 

5import os 

6import sys 

7from time import sleep 

8 

9import click 

10from selenium.common.exceptions import NoSuchWindowException 

11 

12import artemis_sg.scraper as scraper 

13import artemis_sg.spreadsheet as spreadsheet 

14from artemis_sg.config import CFG 

15 

16MODULE = os.path.splitext(os.path.basename(__file__))[0] 

17 

18v_skip = "{}: skipping due to lack of VENDOR" 

19b_skip = "{}: skipping due to lack of WORKBOOK" 

20 

21 

22@click.group(chain=True) 

23@click.option("-V", "--verbose", is_flag=True, help="enable verbose mode") 

24@click.option("-D", "--debug", is_flag=True, help="enable debug mode") 

25@click.option("-v", "--vendor", default=None, help="Vendor code") 

26@click.option( 

27 "-b", "--workbook", default=None, help="Workbook (Sheets Doc ID or Excel File)" 

28) 

29@click.option("-s", "--worksheet", default=None, help="Worksheet within Sheets Doc") 

30@click.pass_context 

31def cli(ctx, verbose, debug, vendor, workbook, worksheet): 

32 """artemis_sg is a tool for processing product spreadsheet data. 

33 Its subcommands are designed to be used to facilitate the follow primary endpoint conditions: 

34 

35 \b 

36 * A Google Slide Deck of products 

37 * An enhanced Excel spreadsheet 

38 * A website order 

39 

40 The subcommands can be combined into desired workflows. 

41 

42 The base command includes --vendor, --workbook, and --worksheet options. 

43 These are used to pass context information to the subcommands. Some 

44 subcommands expect --vendor and --workbook values to perform as designed. 

45 

46 Example of Google Slide Deck workflow: 

47 

48 $ artemis_sg -v sample -b tests/data/test_sheet.xlsx \\ 

49 scrape download upload generate -t "Cool Deck" 

50 

51 Example of Sheet Image workflow: 

52 

53 $ artemis_sg -v sample -b tests/data/test_sheet.xlsx \\ 

54 scrape download mkthumbs sheet-image -o "NewFile.xlsx" 

55 """ 

56 namespace = f"{MODULE}.cli" 

57 if debug: 57 ↛ 58line 57 didn't jump to line 58, because the condition on line 57 was never true

58 logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.DEBUG) 

59 logging.debug(f"{namespace}: Debug mode enabled.") 

60 

61 elif verbose: 61 ↛ 62line 61 didn't jump to line 62, because the condition on line 61 was never true

62 logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.INFO) 

63 logging.info(f"{namespace}: Verbose mode enabled.") 

64 else: 

65 logging.basicConfig(format="%(levelname)s: %(message)s") 

66 

67 # load up context object (ctx) 

68 ctx.ensure_object(dict) 

69 ctx.obj["VENDOR"] = vendor 

70 ctx.obj["WORKBOOK"] = workbook 

71 ctx.obj["WORKSHEET"] = worksheet 

72 

73 

74@cli.command() 

75@click.pass_context 

76def scrape(ctx): 

77 """Scrape web data for vendor from workbook:worksheet 

78 

79 Iterates over the item rows in the spreadsheet provided by the 

80 --workbook:--worksheet values passed by the base command. The ISBN field 

81 is idenfied by the --vendor value passed by the base command. For each 

82 ISBN in the WORKBOOK:WORKSHEET, it searches for item descriptions and 

83 images in a web browser. It collects this information and stores it in the 

84 file defined by the configuration field [asg.data.file.scraped]. If data 

85 for an ISBN already exists in the datafile, the ISBN is skipped and does 

86 not result in re-scraping data for that record. 

87 

88 Scrape supports both Google Sheet ID and Excel file paths for the WORKBOOK 

89 value. 

90 

91 If a --worksheet is not defined, the first sheet in the WORKBOOK will be 

92 used. If the given WORKBOOK contains multiple sheets and the sheet 

93 containing the desired data is not the first sheet in the WORKBOOK, the 

94 --worksheet will need to be specified for the base command. 

95 

96 The command utilizes configuration variables stored in "config.toml" to set 

97 the vendor from [asg.vendors] and scraped items database from 

98 [asg.data.file.scraped]. 

99 """ 

100 cmd = "scrape" 

101 if ctx.obj["VENDOR"]: 

102 if ctx.obj["WORKBOOK"]: 102 ↛ 114line 102 didn't jump to line 114, because the condition on line 102 was never false

103 sdb = CFG["asg"]["data"]["file"]["scraped"] 

104 msg = ( 

105 f"Scraping web data for '{str(ctx.obj['VENDOR'] or '')}' " 

106 f"using '{str(ctx.obj['WORKBOOK'] or '')}':'{str(ctx.obj['WORKSHEET'] or '')}', " 

107 f"saving data to '{sdb}'..." 

108 ) 

109 click.echo(msg) 

110 scraper_wrapper( 

111 ctx.obj["VENDOR"], ctx.obj["WORKBOOK"], ctx.obj["WORKSHEET"], sdb 

112 ) 

113 else: 

114 click.echo(b_skip.format(cmd), err=True) 

115 else: 

116 click.echo(v_skip.format(cmd), err=True) 

117 

118 

119@cli.command() 

120def download(): 

121 """ 

122 Download scraped images 

123 

124 Iterates over the data records in the file defined by the configuration 

125 field [asg.data.file.scraped]. For each record, it downloads the image 

126 files associated with the record to a local directory as defined by the 

127 configuration field [asg.data.dir.images]. 

128 """ 

129 namespace = f"{MODULE}.download" 

130 

131 download_path = CFG["asg"]["data"]["dir"]["images"] 

132 click.echo("Downloading images...") 

133 logging.debug(f"{namespace}: Download path is: {download_path}") 

134 

135 img_downloader_wrapper() 

136 

137 

138@cli.command() 

139def upload(): 

140 """ 

141 Upload local images to Google Cloud Storage Bucket 

142 

143 Uploads the files in the directory defined by the configuration field 

144 [asg.data.dir.upload_source] to the Google Cloud bucket defined by the 

145 configuration field [google.cloud.bucket]. Only the first level of the 

146 source directory is uploaded. Subdirectories of the source directory are 

147 not traversed for the upload. All uploaded files are prefixed with value 

148 defined by the configuration field [google.cloud.bucket_prefix]. 

149 """ 

150 namespace = f"{MODULE}.upload" 

151 

152 upload_source = CFG["asg"]["data"]["dir"]["upload_source"] 

153 click.echo("Uploading images to Google Cloud...") 

154 logging.debug(f"{namespace}: Upload source path is: {upload_source}") 

155 

156 gcloud_wrapper() 

157 

158 

159@cli.command() 

160@click.option("-t", "--title", default="New Arrivals", help="Slide deck title") 

161@click.pass_context 

162def generate(ctx, title): 

163 """ 

164 Generate a Google Slide Deck 

165 

166 

167 The slide deck will be given a title based on the values supplied by VENDOR 

168 and --title. The title slide will be in the following format: 

169 

170 Artemis Book Sales Presents... 

171 Vendor Name, Title 

172 

173 Iterates over item rows in the spreadsheet provided by the 

174 --workbook:--worksheet values passed by the base command. The ISBN field 

175 is idenfied by the --vendor value passed by the base command. For each 

176 ISBN in the WORKBOOK:WORKSHEET 

177 for which it has image data it creates a slide containing the 

178 spreadsheet data, the description saved in the file defined by the configuration 

179 field [asg.data.file.scraped], and the images saved in the 

180 [google.cloud.bucket]. The Google sheet will be saved to the root of the 

181 Google Drive associated with the credentials created during initial 

182 installation. 

183 

184 Generate supports both Google Sheet ID and Excel file paths for the WORKBOOK 

185 value. 

186 

187 If a --worksheet is not defined, the first sheet in the WORKBOOK will be 

188 used. If the given WORKBOOK contains multiple sheets and the sheet 

189 containing the desired data is not the first sheet in the WORKBOOK, the 

190 --worksheet will need to be specified for the base command. 

191 

192 The command utilizes configuration variables stored in "config.toml" to set 

193 the vendor from [asg.vendors] and scraped items database from 

194 [asg.data.file.scraped]. 

195 """ 

196 cmd = "generate" 

197 namespace = f"{MODULE}.{cmd}" 

198 

199 sdb = CFG["asg"]["data"]["file"]["scraped"] 

200 msg = ( 

201 f"Creating Google Slides deck '{title}' for '{str(ctx.obj['VENDOR'] or '')}' " 

202 f"using '{str(ctx.obj['WORKBOOK'] or '')}':'{str(ctx.obj['WORKSHEET'] or '')}'..." 

203 ) 

204 click.echo(msg) 

205 logging.debug(f"{namespace}: Scraped Items Database is: {sdb}") 

206 

207 try: 

208 slide_generator_wrapper( 

209 ctx.obj["VENDOR"], ctx.obj["WORKBOOK"], ctx.obj["WORKSHEET"], sdb, title 

210 ) 

211 except Exception as e: 

212 click.echo(f"Could not generate slide deck:{e}", err=True) 

213 if not ctx.obj["VENDOR"]: 213 ↛ 215line 213 didn't jump to line 215, because the condition on line 213 was never false

214 click.echo("\tVENDOR not provided", err=True) 

215 if not ctx.obj["WORKBOOK"]: 215 ↛ exitline 215 didn't return from function 'generate', because the condition on line 215 was never false

216 click.echo("\tWORKBOOK not provided", err=True) 

217 

218 

219@cli.command() 

220@click.option("-o", "--output", "out", default="out.xlsx", help="Output file") 

221@click.pass_context 

222def sheet_image(ctx, out): 

223 """ 

224 Insert item thumbnail images into spreadsheet 

225 

226 Iterates over item rows in the spreadsheet provided by the 

227 --workbook:--worksheet values passed by the base command. The ISBN field 

228 is idenfied by the --vendor value passed by the base command. For each 

229 

230 Modifies a local XLSX spreadsheet file provided by the 

231 --workbook:--worksheet values passed by the base command to include 

232 thumbnail images in the second column for ISBN items (field itentified by 

233 --vendor) in which local thumbnail image files are available and saves a 

234 new XLSX file. 

235 

236 By default, the thumbnail images are obtained from 

237 [asg.data.dir.images]/thumbnails and the new XLSX file is saved as 

238 "out.xlsx" in the current working directory. 

239 

240 NOTE: Currently, the command does not support Google Sheet IDs as a valid 

241 WORKBOOK type. 

242 

243 If a --worksheet is not defined, the first sheet in the WORKBOOK will be 

244 used. If the given WORKBOOK contains multiple sheets and the sheet 

245 containing the desired data is not the first sheet in the WORKBOOK, the 

246 --worksheet will need to be specified for the base command. 

247 

248 The command utilizes configuration variables stored in "config.toml" to set 

249 the vendor from [asg.vendors]. 

250 """ 

251 cmd = "sheet-image" 

252 namespace = f"{MODULE}.sheet_image" 

253 

254 if ctx.obj["VENDOR"]: 254 ↛ 278line 254 didn't jump to line 278, because the condition on line 254 was never false

255 if ctx.obj["WORKBOOK"]: 255 ↛ 276line 255 didn't jump to line 276, because the condition on line 255 was never false

256 download_path = CFG["asg"]["data"]["dir"]["images"] 

257 image_directory = os.path.join(download_path, "thumbnails") 

258 msg = ( 

259 f"Creating image enhanced spreadsheet for '{str(ctx.obj['VENDOR'] or '')}' " 

260 f"using '{str(ctx.obj['WORKBOOK'] or '')}':'{str(ctx.obj['WORKSHEET'] or '')}', " 

261 f"saving Excel file to '{out}'..." 

262 ) 

263 click.echo(msg) 

264 logging.debug( 

265 f"{namespace}: Thumbnail Image Directory is: {image_directory}" 

266 ) 

267 

268 sheet_image_wrapper( 

269 ctx.obj["VENDOR"], 

270 ctx.obj["WORKBOOK"], 

271 ctx.obj["WORKSHEET"], 

272 image_directory, 

273 out, 

274 ) 

275 else: 

276 click.echo(b_skip.format(cmd), err=True) 

277 else: 

278 click.echo(v_skip.format(cmd), err=True) 

279 

280 

281@cli.command() 

282@click.option( 

283 "--image-directory", 

284 default=CFG["asg"]["data"]["dir"]["images"], 

285 help="Image directory", 

286) 

287def mkthumbs(image_directory): 

288 """ 

289 Create thumbnails of images in IMAGE_DIRECTORY 

290 

291 Creates thumbnail images from images located in a given directory. These 

292 thumbnail images are saved to a "thumbnails" subdirectory in the original 

293 image directory. These files are given the same names as their originals. 

294 

295 By default, the command will use the directory defined by the configuration 

296 field [asg.data.dir.images] and size them to the dimensions defined by 

297 [asg.spreadsheet.mkthumbs.width] and [asg.spreadsheet.mkthumbs.height]. 

298 """ 

299 namespace = f"{MODULE}.mkthumbs" 

300 

301 click.echo(f"Creating thumbnails of images in '{image_directory}'...") 

302 logging.debug(f"{namespace}: Image Directory is: {image_directory}") 

303 

304 mkthumbs_wrapper(image_directory) 

305 

306 

307@cli.command() 

308@click.option("--email", "email", default="", help="TB Customer email to impersonate") 

309@click.pass_context 

310def order(ctx, email): 

311 """ 

312 Add items to be ordered to website cart of vendor from spreadsheet 

313 

314 Populates the website cart for a given --vendor with items from a 

315 --workbook:--worksheet. The WORKSHEET MUST contain an "Order" column from 

316 which the command will get the quantity of each item to put into the cart. 

317 

318 The browser instance with the populated cart is left open for the user to 

319 review and manually complete the order. The user will be asked to manually 

320 login during the execution of this command. 

321 

322 NOTE: Currently, this command does not support Google Sheet IDs as a valid 

323 WORKBOOK type. 

324 

325 If a --worksheet is not defined, the first sheet in the WORKBOOK will be 

326 used. If the given WORKBOOK contains multiple sheets and the sheet 

327 containing the desired data is not the first sheet in the WORKBOOK, the 

328 --worksheet will need to be specified for the base command. 

329 

330 NOTE: The browser opened by this command is controlled by this command. 

331 The browser will automatically close and the session will be terminated at 

332 the end of the defined waiting period. If the web order has not been 

333 completed by the end of the waiting period, the cart may be lost depending 

334 on how the website handles its session data. 

335 

336 The command utilizes configuration variables stored in "config.toml" to set 

337 the vendor from [asg.vendors]. 

338 """ 

339 cmd = "order" 

340 if ctx.obj["VENDOR"]: 340 ↛ 355line 340 didn't jump to line 355, because the condition on line 340 was never false

341 if ctx.obj["WORKBOOK"]: 341 ↛ 353line 341 didn't jump to line 353, because the condition on line 341 was never false

342 msg = ( 

343 f"Creating web order for '{str(ctx.obj['VENDOR'] or '')}' " 

344 f"using '{str(ctx.obj['WORKBOOK'] or '')}':'{str(ctx.obj['WORKSHEET'] or '')}', " 

345 f"Adding items to cart..." 

346 ) 

347 click.echo(msg) 

348 

349 order_wrapper( 

350 email, ctx.obj["VENDOR"], ctx.obj["WORKBOOK"], ctx.obj["WORKSHEET"] 

351 ) 

352 else: 

353 click.echo(b_skip.format(cmd), err=True) 

354 else: 

355 click.echo(v_skip.format(cmd), err=True) 

356 

357 

358# wrappers to make the cli testable 

359def slide_generator_wrapper(vendor, sheet_id, worksheet, sdb, title): 

360 import artemis_sg.slide_generator as slide_generator 

361 

362 slide_generator.main(vendor, sheet_id, worksheet, sdb, title) 

363 

364 

365def gcloud_wrapper(): 

366 import artemis_sg.gcloud as gcloud 

367 

368 gcloud.main() 

369 

370 

371def img_downloader_wrapper(): 

372 import artemis_sg.img_downloader as img_downloader 

373 

374 img_downloader.main() 

375 

376 

377def scraper_wrapper(vendor, sheet_id, worksheet, sdb): 

378 import artemis_sg.scraper as scraper 

379 

380 scraper.main(vendor, sheet_id, worksheet, sdb) 

381 

382 

383def sheet_image_wrapper(vendor, workbook, worksheet, image_directory, out): 

384 spreadsheet.sheet_image(vendor, workbook, worksheet, image_directory, out) 

385 

386 

387def mkthumbs_wrapper(image_directory): 

388 spreadsheet.mkthumbs(image_directory) 

389 

390 

391def order_wrapper(email, vendor, workbook, worksheet): 

392 order_items = spreadsheet.get_order_items(vendor, workbook, worksheet) 

393 if vendor == "tb": 

394 if not email: 394 ↛ 395line 394 didn't jump to line 395, because the condition on line 394 was never true

395 logging.error( 

396 f"order: VENDOR '{vendor}' requires the '--email' option to be set." 

397 ) 

398 sys.exit(1) 

399 driver = scraper.get_driver() 

400 scrapr = scraper.TBScraper(driver) 

401 elif vendor == "gj": 401 ↛ 404line 401 didn't jump to line 404, because the condition on line 401 was never false

402 driver = scraper.get_driver() 

403 scrapr = scraper.GJScraper(driver) 

404 elif vendor == "sd": 

405 driver = scraper.get_driver() 

406 scrapr = scraper.SDScraper(driver) 

407 else: 

408 logging.error( 

409 f"order: VENDOR '{vendor}' is not supported by the order command." 

410 ) 

411 sys.exit(1) 

412 

413 scrapr.load_login_page() 

414 scrapr.login() 

415 if vendor == "tb": 

416 scrapr.impersonate(email) 

417 for item, qty in order_items: 

418 if vendor == "tb": 

419 item = scrapr.search_item_num(item) 

420 if not item: 420 ↛ 421line 420 didn't jump to line 421, because the condition on line 420 was never true

421 continue 

422 res = scrapr.load_item_page(item) 

423 if res: 423 ↛ 417line 423 didn't jump to line 417, because the condition on line 423 was never false

424 scrapr.add_to_cart(qty) 

425 scrapr.load_cart_page() 

426 delay = 600 

427 print("******** USER INPUT REQUIRED ********") 

428 print("Locate the selenium controlled browser") 

429 print("and manually review and complete your order.") 

430 print("******** WAITING FOR USER INPUT ********") 

431 print() 

432 print(f"WARNING: The browser session will terminate in {delay} seconds!!!!") 

433 print("COUNTING DOWN TIME REMAINING...") 

434 countdown(delay, driver) 

435 

436 

437def countdown(delay, driver=None): 

438 while isBrowserAlive(driver) and delay > 0: 

439 print(delay, end="\r") 

440 sleep(1) 

441 delay -= 1 

442 

443 

444def isBrowserAlive(driver): 

445 try: 

446 driver.current_url 

447 return True 

448 except (AttributeError, NoSuchWindowException): 

449 return False 

450 

451 

452if __name__ == "__main__": 

453 cli()