Coverage for src/artemis_sg/cli.py: 80%
184 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-12 06:30 -0700
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-12 06:30 -0700
1#!/usr/bin/env python
3import logging
4import os
5import sys
6from time import sleep
8import click
9from rich.console import Console
10from rich.progress import track
11from rich.text import Text
12from selenium.common.exceptions import NoSuchWindowException
14from artemis_sg import scraper, spreadsheet
15from artemis_sg.config import CFG
17MODULE = os.path.splitext(os.path.basename(__file__))[0]
18console = Console()
20v_skip = "{}: skipping due to lack of VENDOR"
21b_skip = "{}: skipping due to lack of WORKBOOK"
24@click.group(chain=True)
25@click.option("-V", "--verbose", is_flag=True, help="enable verbose mode")
26@click.option("-D", "--debug", is_flag=True, help="enable debug mode")
27@click.option("-v", "--vendor", default=None, help="Vendor code")
28@click.option(
29 "-b", "--workbook", default=None, help="Workbook (Sheets Doc ID or Excel File)"
30)
31@click.option("-s", "--worksheet", default=None, help="Worksheet within Sheets Doc")
32@click.pass_context
33def cli(ctx, verbose, debug, vendor, workbook, worksheet):
34 """artemis_sg is a tool for processing product spreadsheet data.
35 Its subcommands are designed to be used to facilitate the follow primary
36 endpoint conditions:
38 \b
39 * A Google Slide Deck of products
40 * An enhanced Excel spreadsheet
41 * A website order
43 The subcommands can be combined into desired workflows.
45 The base command includes --vendor, --workbook, and --worksheet options.
46 These are used to pass context information to the subcommands. Some
47 subcommands expect --vendor and --workbook values to perform as designed.
49 Example of Google Slide Deck workflow:
51 $ artemis_sg -v sample -b tests/data/test_sheet.xlsx \\
52 scrape download upload generate -t "Cool Deck"
54 Example of Sheet Image workflow:
56 $ artemis_sg -v sample -b tests/data/test_sheet.xlsx \\
57 scrape download mkthumbs sheet-image -o "NewFile.xlsx"
58 """
59 namespace = f"{MODULE}.cli"
60 if debug: 60 ↛ 61line 60 didn't jump to line 61, because the condition on line 60 was never true
61 logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.DEBUG)
62 logging.debug(f"{namespace}: Debug mode enabled.")
64 elif verbose: 64 ↛ 65line 64 didn't jump to line 65, because the condition on line 64 was never true
65 logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.INFO)
66 logging.info(f"{namespace}: Verbose mode enabled.")
67 else:
68 logging.basicConfig(format="%(levelname)s: %(message)s")
70 # load up context object (ctx)
71 ctx.ensure_object(dict)
72 ctx.obj["VENDOR"] = vendor
73 ctx.obj["WORKBOOK"] = workbook
74 ctx.obj["WORKSHEET"] = worksheet
77@cli.command()
78@click.pass_context
79def scrape(ctx):
80 """Scrape web data for vendor from workbook:worksheet
82 Iterates over the item rows in the spreadsheet provided by the
83 --workbook:--worksheet values passed by the base command. The ISBN field
84 is idenfied by the --vendor value passed by the base command. For each
85 ISBN in the WORKBOOK:WORKSHEET, it searches for item descriptions and
86 images in a web browser. It collects this information and stores it in the
87 file defined by the configuration field [asg.data.file.scraped]. If data
88 for an ISBN already exists in the datafile, the ISBN is skipped and does
89 not result in re-scraping data for that record.
91 Scrape supports both Google Sheet ID and Excel file paths for the WORKBOOK
92 value.
94 If a --worksheet is not defined, the first sheet in the WORKBOOK will be
95 used. If the given WORKBOOK contains multiple sheets and the sheet
96 containing the desired data is not the first sheet in the WORKBOOK, the
97 --worksheet will need to be specified for the base command.
99 The command utilizes configuration variables stored in "config.toml" to set
100 the vendor from [asg.vendors] and scraped items database from
101 [asg.data.file.scraped].
102 """
103 cmd = "scrape"
104 if ctx.obj["VENDOR"]:
105 if ctx.obj["WORKBOOK"]: 105 ↛ 118line 105 didn't jump to line 118, because the condition on line 105 was never false
106 sdb = CFG["asg"]["data"]["file"]["scraped"]
107 msg = (
108 f"Scraping web data for '{ctx.obj['VENDOR'] or ''!s}' "
109 f"using '{ctx.obj['WORKBOOK'] or ''!s}':"
110 f"'{ctx.obj['WORKSHEET'] or ''!s}', "
111 f"saving data to '{sdb}'..."
112 )
113 click.echo(msg)
114 scraper_wrapper(
115 ctx.obj["VENDOR"], ctx.obj["WORKBOOK"], ctx.obj["WORKSHEET"], sdb
116 )
117 else:
118 click.echo(b_skip.format(cmd), err=True)
119 else:
120 click.echo(v_skip.format(cmd), err=True)
123@cli.command()
124def download():
125 """
126 Download scraped images
128 Iterates over the data records in the file defined by the configuration
129 field [asg.data.file.scraped]. For each record, it downloads the image
130 files associated with the record to a local directory as defined by the
131 configuration field [asg.data.dir.images].
132 """
133 namespace = f"{MODULE}.download"
135 download_path = CFG["asg"]["data"]["dir"]["images"]
136 click.echo("Downloading images...")
137 logging.debug(f"{namespace}: Download path is: {download_path}")
139 img_downloader_wrapper()
142@cli.command()
143def upload():
144 """
145 Upload local images to Google Cloud Storage Bucket
147 Uploads the files in the directory defined by the configuration field
148 [asg.data.dir.upload_source] to the Google Cloud bucket defined by the
149 configuration field [google.cloud.bucket]. Only the first level of the
150 source directory is uploaded. Subdirectories of the source directory are
151 not traversed for the upload. All uploaded files are prefixed with value
152 defined by the configuration field [google.cloud.bucket_prefix].
153 """
154 namespace = f"{MODULE}.upload"
156 upload_source = CFG["asg"]["data"]["dir"]["upload_source"]
157 click.echo("Uploading images to Google Cloud...")
158 logging.debug(f"{namespace}: Upload source path is: {upload_source}")
160 gcloud_wrapper()
163# TODO: (#163) move title default to CFG
164@cli.command()
165@click.option("-t", "--title", default="New Arrivals", help="Slide deck title")
166@click.pass_context
167def generate(ctx, title):
168 """
169 Generate a Google Slide Deck
172 The slide deck will be given a title based on the values supplied by VENDOR
173 and --title. The title slide will be in the following format:
175 Artemis Book Sales Presents...
176 Vendor Name, Title
178 Iterates over item rows in the spreadsheet provided by the
179 --workbook:--worksheet values passed by the base command. The ISBN field
180 is idenfied by the --vendor value passed by the base command. For each
181 ISBN in the WORKBOOK:WORKSHEET
182 for which it has image data it creates a slide containing the
183 spreadsheet data, the description saved in the file defined by the configuration
184 field [asg.data.file.scraped], and the images saved in the
185 [google.cloud.bucket]. The Google sheet will be saved to the root of the
186 Google Drive associated with the credentials created during initial
187 installation.
189 Generate supports both Google Sheet ID and Excel file paths for the WORKBOOK
190 value.
192 If a --worksheet is not defined, the first sheet in the WORKBOOK will be
193 used. If the given WORKBOOK contains multiple sheets and the sheet
194 containing the desired data is not the first sheet in the WORKBOOK, the
195 --worksheet will need to be specified for the base command.
197 The command utilizes configuration variables stored in "config.toml" to set
198 the vendor from [asg.vendors] and scraped items database from
199 [asg.data.file.scraped].
200 """
201 cmd = "generate"
202 namespace = f"{MODULE}.{cmd}"
204 sdb = CFG["asg"]["data"]["file"]["scraped"]
205 msg = (
206 f"Creating Google Slides deck '{title}' for '{ctx.obj['VENDOR'] or ''!s}' "
207 f"using '{ctx.obj['WORKBOOK'] or ''!s}':'{ctx.obj['WORKSHEET'] or ''!s}'..."
208 )
209 click.echo(msg)
210 logging.debug(f"{namespace}: Scraped Items Database is: {sdb}")
212 try:
213 slide_generator_wrapper(
214 ctx.obj["VENDOR"], ctx.obj["WORKBOOK"], ctx.obj["WORKSHEET"], sdb, title
215 )
216 except Exception as e:
217 click.echo(f"Could not generate slide deck:{e}", err=True)
218 if not ctx.obj["VENDOR"]: 218 ↛ 220line 218 didn't jump to line 220, because the condition on line 218 was never false
219 click.echo("\tVENDOR not provided", err=True)
220 if not ctx.obj["WORKBOOK"]: 220 ↛ exitline 220 didn't return from function 'generate', because the condition on line 220 was never false
221 click.echo("\tWORKBOOK not provided", err=True)
224@cli.command()
225@click.option("-o", "--output", "out", default="out.xlsx", help="Output file")
226@click.pass_context
227def sheet_image(ctx, out):
228 """
229 Insert item thumbnail images into spreadsheet
231 Iterates over item rows in the spreadsheet provided by the
232 --workbook:--worksheet values passed by the base command. The ISBN field
233 is idenfied by the --vendor value passed by the base command. For each
235 Modifies a local XLSX spreadsheet file provided by the
236 --workbook:--worksheet values passed by the base command to include
237 thumbnail images in the second column for ISBN items (field itentified by
238 --vendor) in which local thumbnail image files are available and saves a
239 new XLSX file.
241 By default, the thumbnail images are obtained from
242 [asg.data.dir.images]/thumbnails and the new XLSX file is saved as
243 "out.xlsx" in the current working directory.
245 NOTE: Currently, the command does not support Google Sheet IDs as a valid
246 WORKBOOK type.
248 If a --worksheet is not defined, the first sheet in the WORKBOOK will be
249 used. If the given WORKBOOK contains multiple sheets and the sheet
250 containing the desired data is not the first sheet in the WORKBOOK, the
251 --worksheet will need to be specified for the base command.
253 The command utilizes configuration variables stored in "config.toml" to set
254 the vendor from [asg.vendors].
255 """
256 cmd = "sheet-image"
257 namespace = f"{MODULE}.sheet_image"
259 if ctx.obj["VENDOR"]: 259 ↛ 285line 259 didn't jump to line 285, because the condition on line 259 was never false
260 if ctx.obj["WORKBOOK"]: 260 ↛ 283line 260 didn't jump to line 283, because the condition on line 260 was never false
261 download_path = CFG["asg"]["data"]["dir"]["images"]
262 image_directory = os.path.join(download_path, "thumbnails")
263 msg = (
264 f"Creating image enhanced spreadsheet for "
265 f"'{ctx.obj['VENDOR'] or ''!s}' "
266 f"using '{ctx.obj['WORKBOOK'] or ''!s}':"
267 f"'{ctx.obj['WORKSHEET'] or ''!s}', "
268 f"saving Excel file to '{out}'..."
269 )
270 click.echo(msg)
271 logging.debug(
272 f"{namespace}: Thumbnail Image Directory is: {image_directory}"
273 )
275 sheet_image_wrapper(
276 ctx.obj["VENDOR"],
277 ctx.obj["WORKBOOK"],
278 ctx.obj["WORKSHEET"],
279 image_directory,
280 out,
281 )
282 else:
283 click.echo(b_skip.format(cmd), err=True)
284 else:
285 click.echo(v_skip.format(cmd), err=True)
288@cli.command()
289@click.option(
290 "--image-directory",
291 default=CFG["asg"]["data"]["dir"]["images"],
292 help="Image directory",
293)
294def mkthumbs(image_directory):
295 """
296 Create thumbnails of images in IMAGE_DIRECTORY
298 Creates thumbnail images from images located in a given directory. These
299 thumbnail images are saved to a "thumbnails" subdirectory in the original
300 image directory. These files are given the same names as their originals.
302 By default, the command will use the directory defined by the configuration
303 field [asg.data.dir.images] and size them to the dimensions defined by
304 [asg.spreadsheet.mkthumbs.width] and [asg.spreadsheet.mkthumbs.height].
305 """
306 namespace = f"{MODULE}.mkthumbs"
308 click.echo(f"Creating thumbnails of images in '{image_directory}'...")
309 logging.debug(f"{namespace}: Image Directory is: {image_directory}")
311 mkthumbs_wrapper(image_directory)
314@cli.command()
315@click.option("--email", "email", default="", help="TB Customer email to impersonate")
316@click.option(
317 "--timeout", "timeout", default="600", help="Maximum time to hold browser open"
318)
319@click.pass_context
320def order(ctx, email, timeout):
321 """
322 Add items to be ordered to website cart of vendor from spreadsheet
324 Populates the website cart for a given --vendor with items from a
325 --workbook:--worksheet. The WORKSHEET MUST contain an "Order" column from
326 which the command will get the quantity of each item to put into the cart.
328 The browser instance with the populated cart is left open for the user to
329 review and manually complete the order. The user will be asked to manually
330 login during the execution of this command.
332 NOTE: Currently, this command does not support Google Sheet IDs as a valid
333 WORKBOOK type.
335 If a --worksheet is not defined, the first sheet in the WORKBOOK will be
336 used. If the given WORKBOOK contains multiple sheets and the sheet
337 containing the desired data is not the first sheet in the WORKBOOK, the
338 --worksheet will need to be specified for the base command.
340 NOTE: The browser opened by this command is controlled by this command.
341 The browser will automatically close and the session will be terminated at
342 the end of the defined waiting period. If the web order has not been
343 completed by the end of the waiting period, the cart may be lost depending
344 on how the website handles its session data.
346 The command utilizes configuration variables stored in "config.toml" to set
347 the vendor from [asg.vendors].
348 """
349 cmd = "order"
350 timeout = int(timeout)
351 if ctx.obj["VENDOR"]: 351 ↛ 371line 351 didn't jump to line 371, because the condition on line 351 was never false
352 if ctx.obj["WORKBOOK"]: 352 ↛ 369line 352 didn't jump to line 369, because the condition on line 352 was never false
353 msg = (
354 f"Creating web order for '{ctx.obj['VENDOR'] or ''!s}' "
355 f"using '{ctx.obj['WORKBOOK'] or ''!s}':"
356 f"'{ctx.obj['WORKSHEET'] or ''!s}', "
357 f"Adding items to cart..."
358 )
359 click.echo(msg)
361 order_wrapper(
362 email,
363 ctx.obj["VENDOR"],
364 ctx.obj["WORKBOOK"],
365 ctx.obj["WORKSHEET"],
366 timeout,
367 )
368 else:
369 click.echo(b_skip.format(cmd), err=True)
370 else:
371 click.echo(v_skip.format(cmd), err=True)
374# wrappers to make the cli testable
375def slide_generator_wrapper(vendor, sheet_id, worksheet, sdb, title):
376 from artemis_sg import slide_generator
378 slide_generator.main(vendor, sheet_id, worksheet, sdb, title)
381def gcloud_wrapper():
382 from artemis_sg import gcloud
384 gcloud.main()
387def img_downloader_wrapper():
388 from artemis_sg import img_downloader
390 img_downloader.main()
393def scraper_wrapper(vendor, sheet_id, worksheet, sdb):
394 from artemis_sg import scraper
396 scraper.main(vendor, sheet_id, worksheet, sdb)
399def sheet_image_wrapper(vendor, workbook, worksheet, image_directory, out):
400 spreadsheet.sheet_image(vendor, workbook, worksheet, image_directory, out)
403def mkthumbs_wrapper(image_directory):
404 spreadsheet.mkthumbs(image_directory)
407def order_wrapper(email, vendor, workbook, worksheet, timeout=600):
408 order_items = spreadsheet.get_order_items(vendor, workbook, worksheet)
409 driver, scrapr = get_driver_scraper(vendor, email)
410 if scrapr: 410 ↛ 448line 410 didn't jump to line 448, because the condition on line 410 was never false
411 scrapr.load_login_page()
412 scrapr.login()
413 if vendor == "tb":
414 scrapr.impersonate(email)
415 for item, qty in order_items:
416 if vendor == "tb":
417 item_num = scrapr.search_item_num(item)
418 if not item_num: 418 ↛ 419line 418 didn't jump to line 419, because the condition on line 418 was never true
419 continue
420 else:
421 item_num = item
422 res = scrapr.load_item_page(item_num)
423 if res: 423 ↛ 415line 423 didn't jump to line 415, because the condition on line 423 was never false
424 scrapr.add_to_cart(qty)
425 scrapr.load_cart_page()
426 input_text = Text(
427 """
428 ******** USER INPUT REQUIRED ********
429 Locate the selenium controlled browser
430 and manually review and complete your order.
431 ******** WAITING FOR USER INPUT ********
432 """
433 )
434 input_text.stylize("bold cyan")
435 console.print(input_text)
436 warn_text = Text(
437 f"WARNING: The browser session will terminate in {timeout} seconds!!!!"
438 )
439 warn_text.stylize("bold red")
440 console.print(warn_text)
441 for _i in track( 441 ↛ 444line 441 didn't jump to line 444, because the loop on line 441 never started
442 range(timeout), description="[red]COUNTING DOWN TIME REMAINING..."
443 ):
444 if not is_browser_alive(driver):
445 break
446 sleep(1)
447 else:
448 logging.error(
449 f"order: VENDOR '{vendor}' is not supported by the order command."
450 )
451 sys.exit(1)
454def get_driver_scraper(vendor, email=None):
455 if vendor == "tb":
456 if not email: 456 ↛ 457line 456 didn't jump to line 457, because the condition on line 456 was never true
457 logging.error(
458 f"order: VENDOR '{vendor}' requires the '--email' option to be set."
459 )
460 sys.exit(1)
461 driver = scraper.get_driver()
462 scrapr = scraper.TBScraper(driver)
463 elif vendor == "gj": 463 ↛ 466line 463 didn't jump to line 466, because the condition on line 463 was never false
464 driver = scraper.get_driver()
465 scrapr = scraper.GJScraper(driver)
466 elif vendor == "sd":
467 driver = scraper.get_driver()
468 scrapr = scraper.SDScraper(driver)
469 else:
470 driver = scrapr = None
471 return driver, scrapr
474def is_browser_alive(driver):
475 try:
476 url = driver.current_url
477 if url:
478 return True
479 except (AttributeError, NoSuchWindowException):
480 return False
483if __name__ == "__main__":
484 cli()