Coverage for src/artemis_sg/cli.py: 81%
176 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-05 09:33 -0700
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-05 09:33 -0700
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
4import logging
5import os
6import sys
7from time import sleep
9import click
10from selenium.common.exceptions import NoSuchWindowException
12import artemis_sg.scraper as scraper
13import artemis_sg.spreadsheet as spreadsheet
14from artemis_sg.config import CFG
16MODULE = os.path.splitext(os.path.basename(__file__))[0]
18v_skip = "{}: skipping due to lack of VENDOR"
19b_skip = "{}: skipping due to lack of WORKBOOK"
22@click.group(chain=True)
23@click.option("-V", "--verbose", is_flag=True, help="enable verbose mode")
24@click.option("-D", "--debug", is_flag=True, help="enable debug mode")
25@click.option("-v", "--vendor", default=None, help="Vendor code")
26@click.option(
27 "-b", "--workbook", default=None, help="Workbook (Sheets Doc ID or Excel File)"
28)
29@click.option("-s", "--worksheet", default=None, help="Worksheet within Sheets Doc")
30@click.pass_context
31def cli(ctx, verbose, debug, vendor, workbook, worksheet):
32 """artemis_sg is a tool for processing product spreadsheet data.
33 Its subcommands are designed to be used to facilitate the follow primary endpoint conditions:
35 \b
36 * A Google Slide Deck of products
37 * An enhanced Excel spreadsheet
38 * A website order
40 The subcommands can be combined into desired workflows.
42 The base command includes --vendor, --workbook, and --worksheet options.
43 These are used to pass context information to the subcommands. Some
44 subcommands expect --vendor and --workbook values to perform as designed.
46 Example of Google Slide Deck workflow:
48 $ artemis_sg -v sample -b tests/data/test_sheet.xlsx \\
49 scrape download upload generate -t "Cool Deck"
51 Example of Sheet Image workflow:
53 $ artemis_sg -v sample -b tests/data/test_sheet.xlsx \\
54 scrape download mkthumbs sheet-image -o "NewFile.xlsx"
55 """
56 namespace = f"{MODULE}.cli"
57 if debug: 57 ↛ 58line 57 didn't jump to line 58, because the condition on line 57 was never true
58 logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.DEBUG)
59 logging.debug(f"{namespace}: Debug mode enabled.")
61 elif verbose: 61 ↛ 62line 61 didn't jump to line 62, because the condition on line 61 was never true
62 logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.INFO)
63 logging.info(f"{namespace}: Verbose mode enabled.")
64 else:
65 logging.basicConfig(format="%(levelname)s: %(message)s")
67 # load up context object (ctx)
68 ctx.ensure_object(dict)
69 ctx.obj["VENDOR"] = vendor
70 ctx.obj["WORKBOOK"] = workbook
71 ctx.obj["WORKSHEET"] = worksheet
74@cli.command()
75@click.pass_context
76def scrape(ctx):
77 """Scrape web data for vendor from workbook:worksheet
79 Iterates over the item rows in the spreadsheet provided by the
80 --workbook:--worksheet values passed by the base command. The ISBN field
81 is idenfied by the --vendor value passed by the base command. For each
82 ISBN in the WORKBOOK:WORKSHEET, it searches for item descriptions and
83 images in a web browser. It collects this information and stores it in the
84 file defined by the configuration field [asg.data.file.scraped]. If data
85 for an ISBN already exists in the datafile, the ISBN is skipped and does
86 not result in re-scraping data for that record.
88 Scrape supports both Google Sheet ID and Excel file paths for the WORKBOOK
89 value.
91 If a --worksheet is not defined, the first sheet in the WORKBOOK will be
92 used. If the given WORKBOOK contains multiple sheets and the sheet
93 containing the desired data is not the first sheet in the WORKBOOK, the
94 --worksheet will need to be specified for the base command.
96 The command utilizes configuration variables stored in "config.toml" to set
97 the vendor from [asg.vendors] and scraped items database from
98 [asg.data.file.scraped].
99 """
100 cmd = "scrape"
101 if ctx.obj["VENDOR"]:
102 if ctx.obj["WORKBOOK"]: 102 ↛ 114line 102 didn't jump to line 114, because the condition on line 102 was never false
103 sdb = CFG["asg"]["data"]["file"]["scraped"]
104 msg = (
105 f"Scraping web data for '{str(ctx.obj['VENDOR'] or '')}' "
106 f"using '{str(ctx.obj['WORKBOOK'] or '')}':'{str(ctx.obj['WORKSHEET'] or '')}', "
107 f"saving data to '{sdb}'..."
108 )
109 click.echo(msg)
110 scraper_wrapper(
111 ctx.obj["VENDOR"], ctx.obj["WORKBOOK"], ctx.obj["WORKSHEET"], sdb
112 )
113 else:
114 click.echo(b_skip.format(cmd), err=True)
115 else:
116 click.echo(v_skip.format(cmd), err=True)
119@cli.command()
120def download():
121 """
122 Download scraped images
124 Iterates over the data records in the file defined by the configuration
125 field [asg.data.file.scraped]. For each record, it downloads the image
126 files associated with the record to a local directory as defined by the
127 configuration field [asg.data.dir.images].
128 """
129 namespace = f"{MODULE}.download"
131 download_path = CFG["asg"]["data"]["dir"]["images"]
132 click.echo("Downloading images...")
133 logging.debug(f"{namespace}: Download path is: {download_path}")
135 img_downloader_wrapper()
138@cli.command()
139def upload():
140 """
141 Upload local images to Google Cloud Storage Bucket
143 Uploads the files in the directory defined by the configuration field
144 [asg.data.dir.upload_source] to the Google Cloud bucket defined by the
145 configuration field [google.cloud.bucket]. Only the first level of the
146 source directory is uploaded. Subdirectories of the source directory are
147 not traversed for the upload. All uploaded files are prefixed with value
148 defined by the configuration field [google.cloud.bucket_prefix].
149 """
150 namespace = f"{MODULE}.upload"
152 upload_source = CFG["asg"]["data"]["dir"]["upload_source"]
153 click.echo("Uploading images to Google Cloud...")
154 logging.debug(f"{namespace}: Upload source path is: {upload_source}")
156 gcloud_wrapper()
159@cli.command()
160@click.option("-t", "--title", default="New Arrivals", help="Slide deck title")
161@click.pass_context
162def generate(ctx, title):
163 """
164 Generate a Google Slide Deck
167 The slide deck will be given a title based on the values supplied by VENDOR
168 and --title. The title slide will be in the following format:
170 Artemis Book Sales Presents...
171 Vendor Name, Title
173 Iterates over item rows in the spreadsheet provided by the
174 --workbook:--worksheet values passed by the base command. The ISBN field
175 is idenfied by the --vendor value passed by the base command. For each
176 ISBN in the WORKBOOK:WORKSHEET
177 for which it has image data it creates a slide containing the
178 spreadsheet data, the description saved in the file defined by the configuration
179 field [asg.data.file.scraped], and the images saved in the
180 [google.cloud.bucket]. The Google sheet will be saved to the root of the
181 Google Drive associated with the credentials created during initial
182 installation.
184 Generate supports both Google Sheet ID and Excel file paths for the WORKBOOK
185 value.
187 If a --worksheet is not defined, the first sheet in the WORKBOOK will be
188 used. If the given WORKBOOK contains multiple sheets and the sheet
189 containing the desired data is not the first sheet in the WORKBOOK, the
190 --worksheet will need to be specified for the base command.
192 The command utilizes configuration variables stored in "config.toml" to set
193 the vendor from [asg.vendors] and scraped items database from
194 [asg.data.file.scraped].
195 """
196 cmd = "generate"
197 namespace = f"{MODULE}.{cmd}"
199 sdb = CFG["asg"]["data"]["file"]["scraped"]
200 msg = (
201 f"Creating Google Slides deck '{title}' for '{str(ctx.obj['VENDOR'] or '')}' "
202 f"using '{str(ctx.obj['WORKBOOK'] or '')}':'{str(ctx.obj['WORKSHEET'] or '')}'..."
203 )
204 click.echo(msg)
205 logging.debug(f"{namespace}: Scraped Items Database is: {sdb}")
207 try:
208 slide_generator_wrapper(
209 ctx.obj["VENDOR"], ctx.obj["WORKBOOK"], ctx.obj["WORKSHEET"], sdb, title
210 )
211 except Exception as e:
212 click.echo(f"Could not generate slide deck:{e}", err=True)
213 if not ctx.obj["VENDOR"]: 213 ↛ 215line 213 didn't jump to line 215, because the condition on line 213 was never false
214 click.echo("\tVENDOR not provided", err=True)
215 if not ctx.obj["WORKBOOK"]: 215 ↛ exitline 215 didn't return from function 'generate', because the condition on line 215 was never false
216 click.echo("\tWORKBOOK not provided", err=True)
219@cli.command()
220@click.option("-o", "--output", "out", default="out.xlsx", help="Output file")
221@click.pass_context
222def sheet_image(ctx, out):
223 """
224 Insert item thumbnail images into spreadsheet
226 Iterates over item rows in the spreadsheet provided by the
227 --workbook:--worksheet values passed by the base command. The ISBN field
228 is idenfied by the --vendor value passed by the base command. For each
230 Modifies a local XLSX spreadsheet file provided by the
231 --workbook:--worksheet values passed by the base command to include
232 thumbnail images in the second column for ISBN items (field itentified by
233 --vendor) in which local thumbnail image files are available and saves a
234 new XLSX file.
236 By default, the thumbnail images are obtained from
237 [asg.data.dir.images]/thumbnails and the new XLSX file is saved as
238 "out.xlsx" in the current working directory.
240 NOTE: Currently, the command does not support Google Sheet IDs as a valid
241 WORKBOOK type.
243 If a --worksheet is not defined, the first sheet in the WORKBOOK will be
244 used. If the given WORKBOOK contains multiple sheets and the sheet
245 containing the desired data is not the first sheet in the WORKBOOK, the
246 --worksheet will need to be specified for the base command.
248 The command utilizes configuration variables stored in "config.toml" to set
249 the vendor from [asg.vendors].
250 """
251 cmd = "sheet-image"
252 namespace = f"{MODULE}.sheet_image"
254 if ctx.obj["VENDOR"]: 254 ↛ 278line 254 didn't jump to line 278, because the condition on line 254 was never false
255 if ctx.obj["WORKBOOK"]: 255 ↛ 276line 255 didn't jump to line 276, because the condition on line 255 was never false
256 download_path = CFG["asg"]["data"]["dir"]["images"]
257 image_directory = os.path.join(download_path, "thumbnails")
258 msg = (
259 f"Creating image enhanced spreadsheet for '{str(ctx.obj['VENDOR'] or '')}' "
260 f"using '{str(ctx.obj['WORKBOOK'] or '')}':'{str(ctx.obj['WORKSHEET'] or '')}', "
261 f"saving Excel file to '{out}'..."
262 )
263 click.echo(msg)
264 logging.debug(
265 f"{namespace}: Thumbnail Image Directory is: {image_directory}"
266 )
268 sheet_image_wrapper(
269 ctx.obj["VENDOR"],
270 ctx.obj["WORKBOOK"],
271 ctx.obj["WORKSHEET"],
272 image_directory,
273 out,
274 )
275 else:
276 click.echo(b_skip.format(cmd), err=True)
277 else:
278 click.echo(v_skip.format(cmd), err=True)
281@cli.command()
282@click.option(
283 "--image-directory",
284 default=CFG["asg"]["data"]["dir"]["images"],
285 help="Image directory",
286)
287def mkthumbs(image_directory):
288 """
289 Create thumbnails of images in IMAGE_DIRECTORY
291 Creates thumbnail images from images located in a given directory. These
292 thumbnail images are saved to a "thumbnails" subdirectory in the original
293 image directory. These files are given the same names as their originals.
295 By default, the command will use the directory defined by the configuration
296 field [asg.data.dir.images] and size them to the dimensions defined by
297 [asg.spreadsheet.mkthumbs.width] and [asg.spreadsheet.mkthumbs.height].
298 """
299 namespace = f"{MODULE}.mkthumbs"
301 click.echo(f"Creating thumbnails of images in '{image_directory}'...")
302 logging.debug(f"{namespace}: Image Directory is: {image_directory}")
304 mkthumbs_wrapper(image_directory)
307@cli.command()
308@click.option("--email", "email", default="", help="TB Customer email to impersonate")
309@click.pass_context
310def order(ctx, email):
311 """
312 Add items to be ordered to website cart of vendor from spreadsheet
314 Populates the website cart for a given --vendor with items from a
315 --workbook:--worksheet. The WORKSHEET MUST contain an "Order" column from
316 which the command will get the quantity of each item to put into the cart.
318 The browser instance with the populated cart is left open for the user to
319 review and manually complete the order. The user will be asked to manually
320 login during the execution of this command.
322 NOTE: Currently, this command does not support Google Sheet IDs as a valid
323 WORKBOOK type.
325 If a --worksheet is not defined, the first sheet in the WORKBOOK will be
326 used. If the given WORKBOOK contains multiple sheets and the sheet
327 containing the desired data is not the first sheet in the WORKBOOK, the
328 --worksheet will need to be specified for the base command.
330 NOTE: The browser opened by this command is controlled by this command.
331 The browser will automatically close and the session will be terminated at
332 the end of the defined waiting period. If the web order has not been
333 completed by the end of the waiting period, the cart may be lost depending
334 on how the website handles its session data.
336 The command utilizes configuration variables stored in "config.toml" to set
337 the vendor from [asg.vendors].
338 """
339 cmd = "order"
340 if ctx.obj["VENDOR"]: 340 ↛ 355line 340 didn't jump to line 355, because the condition on line 340 was never false
341 if ctx.obj["WORKBOOK"]: 341 ↛ 353line 341 didn't jump to line 353, because the condition on line 341 was never false
342 msg = (
343 f"Creating web order for '{str(ctx.obj['VENDOR'] or '')}' "
344 f"using '{str(ctx.obj['WORKBOOK'] or '')}':'{str(ctx.obj['WORKSHEET'] or '')}', "
345 f"Adding items to cart..."
346 )
347 click.echo(msg)
349 order_wrapper(
350 email, ctx.obj["VENDOR"], ctx.obj["WORKBOOK"], ctx.obj["WORKSHEET"]
351 )
352 else:
353 click.echo(b_skip.format(cmd), err=True)
354 else:
355 click.echo(v_skip.format(cmd), err=True)
358# wrappers to make the cli testable
359def slide_generator_wrapper(vendor, sheet_id, worksheet, sdb, title):
360 import artemis_sg.slide_generator as slide_generator
362 slide_generator.main(vendor, sheet_id, worksheet, sdb, title)
365def gcloud_wrapper():
366 import artemis_sg.gcloud as gcloud
368 gcloud.main()
371def img_downloader_wrapper():
372 import artemis_sg.img_downloader as img_downloader
374 img_downloader.main()
377def scraper_wrapper(vendor, sheet_id, worksheet, sdb):
378 import artemis_sg.scraper as scraper
380 scraper.main(vendor, sheet_id, worksheet, sdb)
383def sheet_image_wrapper(vendor, workbook, worksheet, image_directory, out):
384 spreadsheet.sheet_image(vendor, workbook, worksheet, image_directory, out)
387def mkthumbs_wrapper(image_directory):
388 spreadsheet.mkthumbs(image_directory)
391def order_wrapper(email, vendor, workbook, worksheet):
392 order_items = spreadsheet.get_order_items(vendor, workbook, worksheet)
393 if vendor == "tb":
394 if not email: 394 ↛ 395line 394 didn't jump to line 395, because the condition on line 394 was never true
395 logging.error(
396 f"order: VENDOR '{vendor}' requires the '--email' option to be set."
397 )
398 sys.exit(1)
399 driver = scraper.get_driver()
400 scrapr = scraper.TBScraper(driver)
401 elif vendor == "gj": 401 ↛ 404line 401 didn't jump to line 404, because the condition on line 401 was never false
402 driver = scraper.get_driver()
403 scrapr = scraper.GJScraper(driver)
404 elif vendor == "sd":
405 driver = scraper.get_driver()
406 scrapr = scraper.SDScraper(driver)
407 else:
408 logging.error(
409 f"order: VENDOR '{vendor}' is not supported by the order command."
410 )
411 sys.exit(1)
413 scrapr.load_login_page()
414 scrapr.login()
415 if vendor == "tb":
416 scrapr.impersonate(email)
417 for item, qty in order_items:
418 if vendor == "tb":
419 item = scrapr.search_item_num(item)
420 if not item: 420 ↛ 421line 420 didn't jump to line 421, because the condition on line 420 was never true
421 continue
422 res = scrapr.load_item_page(item)
423 if res: 423 ↛ 417line 423 didn't jump to line 417, because the condition on line 423 was never false
424 scrapr.add_to_cart(qty)
425 scrapr.load_cart_page()
426 delay = 600
427 print("******** USER INPUT REQUIRED ********")
428 print("Locate the selenium controlled browser")
429 print("and manually review and complete your order.")
430 print("******** WAITING FOR USER INPUT ********")
431 print()
432 print(f"WARNING: The browser session will terminate in {delay} seconds!!!!")
433 print("COUNTING DOWN TIME REMAINING...")
434 countdown(delay, driver)
437def countdown(delay, driver=None):
438 while isBrowserAlive(driver) and delay > 0:
439 print(delay, end="\r")
440 sleep(1)
441 delay -= 1
444def isBrowserAlive(driver):
445 try:
446 driver.current_url
447 return True
448 except (AttributeError, NoSuchWindowException):
449 return False
452if __name__ == "__main__":
453 cli()