seleniumuser.seleniumuser

  1import atexit
  2import os
  3import random
  4import sys
  5import time
  6from pathlib import Path
  7from types import LambdaType
  8from typing import Any
  9from warnings import warn
 10
 11from bs4 import BeautifulSoup
 12from noiftimer import Timer
 13from selenium import webdriver
 14from selenium.webdriver.chrome.options import Options as ChromeOptions
 15from selenium.webdriver.chrome.service import Service as ChromeService
 16from selenium.webdriver.common.by import By
 17from selenium.webdriver.common.keys import Keys
 18from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
 19from selenium.webdriver.firefox.options import Options as FirefoxOptions
 20from selenium.webdriver.firefox.service import Service as FirefoxService
 21from selenium.webdriver.remote.webelement import WebElement
 22from selenium.webdriver.support.ui import Select
 23from voxscribe import get_text_from_url
 24from whosyouragent import get_agent
 25
 26
 27class User:
 28    """Sits on top of selenium to streamline
 29    automation and scraping tasks."""
 30
 31    def __init__(
 32        self,
 33        headless: bool = False,
 34        browser_type: str = "firefox",
 35        implicit_wait: int = 10,
 36        page_load_timeout: int = 60,
 37        open_browser: bool = True,
 38        locator_method: str = "xpath",
 39        randomize_user_agent: bool = True,
 40        user_agent_rotation_period: int = None,
 41        move_window_by: tuple[int, int] = (0, -1000),
 42        download_dir: str | Path = None,
 43        driver_path: str | Path = None,
 44    ):
 45        """
 46        :param headless: If True, browser window will not be visible.
 47
 48        :param browser_type: Which browser to use. Can be 'firefox' or 'chrome'.
 49
 50        :param implicit_wait: Number of seconds to look for a specified element before
 51        selenium considers it missing and throws an exception.
 52
 53        :param page_load_timeout: Time in seconds for selenium to wait for a page to load
 54        before throwing an exception.
 55
 56        :param open_browser: If True, opens a browser window when a User object is created.
 57        If False, a manual call to self.open_browser() must be made.
 58
 59        :param locator_method: The locator type User should expect to be given.
 60        Can be 'xpath', 'id', 'className', 'name', or 'cssSelector'.
 61        Every member function with a 'locator' argument refers to a string matching
 62        the current locator_method.
 63
 64        :param randomize_user_agent: If True, a random useragent will be used whenever
 65        the browser is opened. If False, the native useragent will be used.
 66
 67        :param user_agent_rotation_period: If not None, the browser window will be closed
 68        and reopened with a new useragent every user_agent_rotation_period number of minutes.
 69        Rotation occurs on the first call to self.get() after the time period has elapsed.
 70        Ignored if randomize_user_agent is False.
 71
 72        :param move_window_by: The x and y amount of pixels to move the browser window by after opening.
 73
 74        :param download_dir: The download folder to use. If None, the default folder will be used.
 75
 76        :param driver_path: The path to the webdriver executable selenium should use.
 77        If None, the system PATH will be checked for the executable.
 78        If the executable isn't found, the parent directories and the immediate child directories
 79        of the current working directory will be searched.
 80        """
 81        self.headless = headless
 82        browser_type = browser_type.lower()
 83        if browser_type in ["firefox", "chrome"]:
 84            self.browser_type = browser_type
 85        else:
 86            raise ValueError("'browser_type' parameter must be 'firefox' or 'chrome'")
 87        self.browser_open = False
 88        self.implicit_wait = implicit_wait
 89        self.page_load_timeout = page_load_timeout
 90        self.rotation_timer = Timer()
 91        self.timer = Timer()
 92        self.timer.start()
 93        self.randomize_user_agent = randomize_user_agent
 94        self.user_agent_rotation_period = user_agent_rotation_period
 95        self.locator_method = locator_method
 96        self.turbo()
 97        self.keys = Keys
 98        self.move_window_by = move_window_by
 99        self.download_dir = download_dir
100        self.driver_path = driver_path
101        if not self.driver_path:
102            self.search_for_driver()
103        if open_browser:
104            self.open_browser()
105        else:
106            self.browser = None
107        atexit.register(self.close_browser)
108
109    def __enter__(self):
110        return self
111
112    def __exit__(self, *args):
113        self.close_browser()
114
115    def configure_firefox(self) -> FirefoxService:
116        """Configure options and profile for firefox."""
117        self.options = FirefoxOptions()
118        self.options.headless = self.headless
119        self.options.set_preference(
120            "widget.windows.window_occlusion_tracking.enabled", False
121        )
122        self.options.set_preference("dom.webaudio.enabled", False)
123        if self.randomize_user_agent:
124            self.options.set_preference("general.useragent.override", get_agent())
125        if self.download_dir:
126            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
127            self.profile = FirefoxProfile()
128            self.profile.set_preference("browser.download.dir", str(self.download_dir))
129            self.profile.set_preference("browser.download.folderList", 2)
130        else:
131            self.profile = None
132        self.service = FirefoxService(
133            executable_path=str(self.driver_path), log_path=os.devnull
134        )
135
136    def configure_chrome(self) -> ChromeService:
137        """Configure options and profile for chrome."""
138        self.options = ChromeOptions()
139        self.options.headless = self.headless
140        self.options.add_argument("--disable-blink-features=AutomationControlled")
141        self.options.add_argument("--mute-audio")
142        self.options.add_argument("--disable-infobars")
143        self.options.add_argument("--disable-notifications")
144        self.options.add_argument("--log-level=3")
145        if self.randomize_user_agent:
146            self.options.add_argument(f"--user-agent={get_agent()}")
147        self.options.add_experimental_option("useAutomationExtension", False)
148        if self.download_dir:
149            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
150            self.options.add_experimental_option(
151                "prefs", {"download.default_directory": str(self.download_dir)}
152            )
153        self.service = ChromeService(
154            executable_path=str(self.driver_path), log_path=os.devnull
155        )
156
157    def search_for_driver(self):
158        """Searches for the webdriver executable."""
159        cwd = Path.cwd()
160        found = False
161        match self.browser_type:
162            case "firefox":
163                driver = "geckodriver.exe"
164            case "chrome":
165                driver = "chromedriver.exe"
166        # search PATH
167        env_path = os.environ["PATH"]
168        if sys.platform == "win32":
169            env_paths = env_path.split(";")
170        else:
171            env_paths = env_path.split(":")
172            driver = driver[: driver.find(".")]
173        for path in env_paths:
174            if (Path(path) / driver).exists():
175                self.driver_path = Path(path) / driver
176                found = True
177                break
178        # check current working directory and parent folders
179        if not found:
180            while cwd != cwd.parent:
181                if (cwd / driver).exists():
182                    self.driver_path = cwd / driver
183                    found = True
184                    break
185                cwd = cwd.parent
186            # check top most level
187            if not found and (cwd / driver).exists():
188                self.driver_path = cwd / driver
189                found = True
190        # check child folders (only 1 level down)
191        if not found:
192            for child in Path.cwd().iterdir():
193                if child.is_dir() and (child / driver).exists():
194                    self.driver_path = child / driver
195                    found = True
196        if not found:
197            warn(f"Could not find {driver}")
198
199    def set_implicit_wait(self, wait_time: int = None):
200        """Sets to default time if no arg given."""
201        if not wait_time:
202            self.browser.implicitly_wait(self.implicit_wait)
203        else:
204            self.browser.implicitly_wait(wait_time)
205
206    def open_browser(self):
207        """Configures and opens selenium browser."""
208        if not self.browser_open:
209            match self.browser_type:
210                case "firefox":
211                    self.configure_firefox()
212                    self.browser = webdriver.Firefox(
213                        options=self.options,
214                        service=self.service,
215                        firefox_profile=self.profile,
216                    )
217                case "chrome":
218                    self.configure_chrome()
219                    self.browser = webdriver.Chrome(
220                        options=self.options, service=self.service
221                    )
222            self.set_implicit_wait()
223            self.browser.maximize_window()
224            self.browser.set_window_position(
225                self.move_window_by[0], self.move_window_by[1]
226            )
227            self.browser.maximize_window()
228            self.browser.set_page_load_timeout(self.page_load_timeout)
229            self.browser_open = True
230            self.tab_index = 0
231            self.rotation_timer.start()
232        else:
233            warn("Browser already open.")
234
235    def close_browser(self):
236        """Close browser window."""
237        if self.browser_open:
238            self.browser_open = False
239            self.browser.quit()
240
241    def open_tab(self, url: str = "", switch_to_tab: bool = True):
242        """Opens new tab and, if provided, goes to url.
243
244        New tab is inserted after currently active tab."""
245        self.script("window.open(arguments[0]);", url)
246        if switch_to_tab:
247            self.switch_to_tab(self.tab_index + 1)
248
249    def switch_to_tab(self, tab_index: int):
250        """Switch to a tab in browser, zero indexed."""
251        self.browser.switch_to.window(self.browser.window_handles[tab_index])
252        self.tab_index = tab_index
253
254    def get_num_tabs(self) -> int:
255        """Returns number of tabs open."""
256        return len(self.browser.window_handles)
257
258    def close_tab(self, tab_index: int = 1):
259        """Close specified tab and
260        switches to tab index 0."""
261        self.switch_to_tab(tab_index)
262        self.browser.close()
263        self.switch_to_tab(0)
264
265    def get(self, url: str):
266        """Requests webpage at given url and rotates userAgent if necessary."""
267        if not self.browser_open:
268            self.open_browser()
269        if (
270            self.randomize_user_agent
271            and self.user_agent_rotation_period is not None
272            and self.rotation_timer.check(format=False)
273            > (60 * self.user_agent_rotation_period)
274        ):
275            self.rotation_timer.stop()
276            self.close_browser()
277            self.open_browser()
278        self.browser.get(url)
279        self.script("Object.defineProperty(navigator, 'webdriver', {get: () => false})")
280        self.chill(self.arrival_wait)
281
282    def get_soup(self) -> BeautifulSoup:
283        """Returns a BeautifulSoup object
284        of the current page source."""
285        return BeautifulSoup(self.browser.page_source, "html.parser")
286
287    def current_url(self) -> str:
288        """Returns current url of active tab."""
289        return self.browser.current_url
290
291    def delete_cookies(self):
292        """Delete all cookies for
293        this browser instance."""
294        self.browser.delete_all_cookies()
295
296    def turbo(self, engage: bool = True):
297        """When engaged, strings will be sent
298        to elements all at once and there will be
299        no waiting after actions.
300
301        When disengaged, strings will be sent to elements
302        'one key at a time' with randomized amounts of
303        time between successive keys and after actions."""
304        if engage:
305            self.after_key_wait = (0, 0)
306            self.after_field_wait = (0, 0)
307            self.after_click_wait = (0, 0)
308            self.arrival_wait = (1, 1)
309            self.one_key_at_a_time = False
310            self.turbo_engaged = True
311        else:
312            self.after_key_wait = (0.1, 0.5)
313            self.after_field_wait = (1, 2)
314            self.after_click_wait = (0.25, 1.5)
315            self.arrival_wait = (4, 10)
316            self.one_key_at_a_time = True
317            self.turbo_engaged = False
318
319    def chill(self, min_max: tuple[float, float]):
320        """Sleeps a random amount
321        between min_max[0] and min_max[1]."""
322        time.sleep(random.uniform(min_max[0], min_max[1]))
323
324    def script(self, script: str, args: Any = None) -> Any:
325        """Execute javascript code and returns result."""
326        return self.browser.execute_script(script, args)
327
328    def remove(self, locator: str):
329        """Removes element from DOM."""
330        self.script("arguments[0].remove();", self.find(locator))
331
332    def get_length(self, locator: str) -> int:
333        """Returns number of child elements for a given element."""
334        return int(self.script("return arguments[0].length;", self.find(locator)))
335
336    def find(self, locator: str) -> WebElement:
337        """Finds and returns a WebElement."""
338        match self.locator_method:
339            case "xpath":
340                return self.browser.find_element(By.XPATH, locator)
341            case "id":
342                return self.browser.find_element(By.ID, locator)
343            case "className":
344                return self.browser.find_element(By.CLASS_NAME, locator)
345            case "name":
346                return self.browser.find_element(By.NAME, locator)
347            case "cssSelector":
348                return self.browser.find_element(By.CSS_SELECTOR, locator)
349
350    def find_children(self, locator: str) -> list[WebElement]:
351        """Returns a list of child WebElements
352        for given locator arg."""
353        element = self.find(locator)
354        return element.find_elements("xpath", "./*")
355
356    def scroll(self, amount: int = None, fraction: float = None):
357        """Scroll web page.
358        :param amount: The number of lines to scroll if not None.
359
360        :param fraction: The amount between 0.0 and 1.0
361        of the page height to scroll.
362
363        If values are provided for both arguments,
364        amount will be used.
365
366        If values are provided for neither argument,
367        the entire page length will be scrolled.
368
369        Scrolls one line at a time if self.turbo is False."""
370        if amount:
371            amount_to_scroll = amount
372        elif fraction:
373            amount_to_scroll = int(
374                fraction
375                * (
376                    int(self.script("return document.body.scrollHeight;"))
377                    - int(self.script("return window.pageYOffset;"))
378                )
379            )
380        else:
381            amount_to_scroll = int(self.script("return document.body.scrollHeight;"))
382        if self.turbo_engaged:
383            self.script("window.scrollBy(0,arguments[0]);", amount_to_scroll)
384        else:
385            for _ in range(abs(amount_to_scroll)):
386                if amount_to_scroll >= 0:
387                    self.script("window.scrollBy(0,1);")
388                else:
389                    self.script("window.scrollBy(0,-1);")
390        self.chill(self.after_click_wait)
391
392    def scroll_into_view(self, locator: str) -> WebElement:
393        """Scrolls to a given element and returns the element."""
394        element = self.find(locator)
395        self.script("arguments[0].scroll_into_view();", element)
396        self.chill(self.after_click_wait)
397        return element
398
399    def text(self, locator: str) -> str:
400        """Returns text of WebElement."""
401        return self.find(locator).text
402
403    def click(self, locator: str) -> WebElement:
404        """Clicks on and returns WebElement."""
405        element = self.find(locator)
406        element.click()
407        self.chill(self.after_click_wait)
408        return element
409
410    def clear(self, locator: str) -> WebElement:
411        """Clears content of WebElement if able
412        and then returns WebElement."""
413        element = self.find(locator)
414        element.clear()
415        self.chill(self.after_click_wait)
416        return element
417
418    def switch_to_iframe(self, locator: str):
419        """Switch to an iframe from given locator."""
420        self.browser.switch_to.frame(self.find(locator))
421
422    def switch_to_parent_frame(self):
423        """Move up a frame level from current frame."""
424        self.browser.switch_to.parent_frame()
425
426    def select(
427        self, locator: str, method: str, choice: str | int | tuple
428    ) -> WebElement:
429        """Select a choice from Select element.
430        Returns the Select element from the locator string,
431        not the option element that is selected.
432
433        :param method: Can be 'value' or 'index'
434
435        :param choice: The option to select.
436
437        If method is 'value', then choice should be
438        the html 'value' attribute of the desired option.
439
440        If method is 'index', choice can either be a single
441        int for the desired option or it can be a two-tuple.
442        If the tuple is provided, a random option between the
443        two indicies (inclusive) will be selected."""
444        element = self.click(locator)
445        match method:
446            case "value":
447                Select(element).select_by_value(choice)
448            case "index":
449                if type(choice) == tuple:
450                    choice = random.randint(choice[0], choice[1])
451                Select(element).select_by_index(choice)
452        self.chill(self.after_field_wait)
453        return element
454
455    def click_elements(
456        self, locators: list[str], max_selections: int = None, min_selections: int = 1
457    ) -> WebElement:
458        """Click a random number of WebElements
459        and return the last WebElement clicked.
460
461        :param locators: A list of element locators to choose from.
462
463        :param max_selections: The maximum number of elements to click.
464        If None, the maximum will be the length of the locators list.
465
466        :param min_selections: The minimum number of elements to click.
467
468        e.g. self.click_elements([xpath1, xpath2, xpath3, xpath4], max_selections=3)
469        will click between 1 and 3 random elements from the list.
470        """
471        if not max_selections:
472            max_selections = len(locators)
473        for option in random.sample(
474            locators, k=random.randint(min_selections, max_selections)
475        ):
476            element = self.click(option)
477        return element
478
479    def get_click_list(
480        self, num_options: int, max_choices: int = 1, min_choices: int = 1
481    ) -> list[str]:
482        """Similar to self.click_elements(), but for use with the self.fill_next() method.
483
484        Creates a list of length 'num_options' where every element is 'skip'.
485
486        A random number of elements in the list between 'min_choices' and 'max_choices' are
487        replaced with 'keys.SPACE' (interpreted as a click by almost all web forms)."""
488        click_list = ["skip"] * num_options
489        selected_indexes = []
490        for i in range(random.randint(min_choices, max_choices)):
491            index = random.randint(0, num_options - 1)
492            while index in selected_indexes:
493                index = random.randint(0, num_options - 1)
494            selected_indexes.append(index)
495            click_list[index] = self.keys.SPACE
496        return click_list
497
498    def send_keys(
499        self,
500        locator: str,
501        data: str,
502        click_first: bool = True,
503        clear_first: bool = False,
504    ) -> WebElement:
505        """Types data into element and returns the element.
506
507        :param data: The string to send to the element.
508
509        :param click_first: If True, the element is clicked on
510        before the data is sent.
511
512        :param clear_first: If True, the current text of the element
513        is cleared before the data is sent."""
514        element = self.click(locator) if click_first else self.find(locator)
515        if clear_first:
516            element.clear()
517            self.chill(self.after_click_wait)
518        if self.one_key_at_a_time:
519            for ch in str(data):
520                element.send_keys(ch)
521                self.chill(self.after_key_wait)
522        else:
523            element.send_keys(str(data))
524        self.chill(self.after_field_wait)
525        return element
526
527    def fill_next(
528        self, data: list[str | tuple], start_element: WebElement = None
529    ) -> WebElement:
530        """Fills a form by tabbing from the current WebElement
531        to the next one and using the corresponding item in data.
532        Returns the last WebElement.
533
534        :param data: A list of form data. If an item is a string (except for 'skip')
535        it will be typed into the current WebElement.
536
537        An item in data can be a two-tuple of the form
538        ('downArrow', numberOfPresses:int|tuple[int, int]).
539
540        If numberOfPresses is a single int, Keys.ARROW_DOWN will be sent
541        that many times to the WebElement.
542
543        If numberOfPresses is a tuple, Keys.ARROW_DOWN will be sent a random
544        number of times between numberOfPresses[0] and numberOfPresses[1] inclusive.
545        This is typically for use with Select elements.
546
547        An item in data can also be 'skip', which will perform no action on the current
548        WebElement and will continue to the next one.
549
550        An item in data can also be 'click=n', where 'n' is an integer b/t 0 and 100,
551        representing a percent chance an element will be clicked or skipped:
552        >>> user.fill_next(["click=70"])
553
554        has a 70% chance of being
555        >>> user.fill_next([user.keys.SPACE])
556
557        and a 30% chance of being
558        >>> user.fill_next(["skip"])
559
560
561        :param start_element: The WebElement to start tabbing from.
562        The currently active element will be used if start_element is None.
563
564        Note: The function tabs to the next element before sending data,
565        so the start_element should the WebElement before the one
566        that should receive data[0].
567        """
568        element = (
569            self.browser.switch_to.active_element
570            if not start_element
571            else start_element
572        )
573        for datum in data:
574            element.send_keys(Keys.TAB)
575            element = self.browser.switch_to.active_element
576            self.chill(self.after_key_wait)
577            if type(datum) == str and datum.strip().startswith("click="):
578                chance = int(datum.split("=")[1].strip())
579                if random.randint(0, 100) <= chance:
580                    datum = Keys.SPACE
581                else:
582                    datum = "skip"
583            if datum[0] == "downArrow":
584                if type(datum[1]) == tuple:
585                    times = random.randint(datum[1][0], datum[1][1])
586                else:
587                    times = datum[1]
588                for _ in range(times):
589                    element.send_keys(Keys.ARROW_DOWN)
590                    self.chill(self.after_key_wait)
591            elif datum == "skip":
592                self.chill(self.after_key_wait)
593            else:
594
595                if self.turbo_engaged:
596                    element.send_keys(str(datum))
597                else:
598                    for ch in str(datum):
599                        element.send_keys(ch)
600                        self.chill(self.after_key_wait)
601            self.chill(self.after_field_wait)
602        return element
603
604    def wait_until(
605        self, condition: LambdaType, max_wait: float = 10, polling_interval: float = 0.1
606    ):
607        """Checks condition repeatedly until either it is true,
608        or the max_wait is exceeded.
609
610        Raises a TimeoutError if the condition doesn't success within max_wait.
611
612        Useful for determing whether a form has been successfully submitted.
613
614        :param condition: The condition function to check.
615
616        :param max_wait: Number of seconds to continue checking condition
617        before throwing a TimeoutError.
618
619        :param polling_interval: The number of seconds to sleep before
620        checking the condition function again after it fails.
621
622        e.g. self.wait_until(lambda: 'Successfully Submitted' in self.text('//p[@id="form-output"]))"""
623        start_time = time.time()
624        while True:
625            try:
626                if condition():
627                    time.sleep(1)
628                    break
629                elif (time.time() - start_time) > max_wait:
630                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
631                else:
632                    time.sleep(polling_interval)
633            except:
634                if (time.time() - start_time) > max_wait:
635                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
636                else:
637                    time.sleep(polling_interval)
638
639    def dismiss_alert(self):
640        """Dismiss alert dialog."""
641        self.browser.switch_to.alert.dismiss()
642
643    def solve_recaptcha_v3(
644        self,
645        outer_iframe_xpath: str = '//iframe[@title="reCAPTCHA"]',
646        inner_iframe_xpath: str = '//iframe[@title="recaptcha challenge expires in two minutes"]',
647    ):
648        """Pass google recaptcha v3 by solving an audio puzzle.
649
650        :param outer_iframe_xpath: Xpath to the iframe containing the recaptcha checkbox.
651        If it's the recaptcha without the initial checkbox that just shows the image puzzle,
652        pass None to this argument.
653
654        """
655        locator_method = self.locator_method
656        self.locator_method = "xpath"
657        try:
658            if outer_iframe_xpath:
659                self.switch_to_iframe(outer_iframe_xpath)
660                self.click('//*[@id="recaptcha-anchor"]')
661                self.switch_to_parent_frame()
662            self.switch_to_iframe(inner_iframe_xpath)
663            self.click('//*[@id="recaptcha-audio-button"]')
664            mp3_url = self.find(
665                '//a[@class="rc-audiochallenge-tdownload-link"]'
666            ).get_attribute("href")
667            text = get_text_from_url(mp3_url, ".mp3")
668            self.send_keys('//*[@id="audio-response"]', text)
669            self.click('//*[@id="recaptcha-verify-button"]')
670        except Exception as e:
671            print(e)
672            raise Exception("Could not solve captcha")
673        finally:
674            self.switch_to_parent_frame()
675            self.locator_method = locator_method
class User:
 28class User:
 29    """Sits on top of selenium to streamline
 30    automation and scraping tasks."""
 31
 32    def __init__(
 33        self,
 34        headless: bool = False,
 35        browser_type: str = "firefox",
 36        implicit_wait: int = 10,
 37        page_load_timeout: int = 60,
 38        open_browser: bool = True,
 39        locator_method: str = "xpath",
 40        randomize_user_agent: bool = True,
 41        user_agent_rotation_period: int = None,
 42        move_window_by: tuple[int, int] = (0, -1000),
 43        download_dir: str | Path = None,
 44        driver_path: str | Path = None,
 45    ):
 46        """
 47        :param headless: If True, browser window will not be visible.
 48
 49        :param browser_type: Which browser to use. Can be 'firefox' or 'chrome'.
 50
 51        :param implicit_wait: Number of seconds to look for a specified element before
 52        selenium considers it missing and throws an exception.
 53
 54        :param page_load_timeout: Time in seconds for selenium to wait for a page to load
 55        before throwing an exception.
 56
 57        :param open_browser: If True, opens a browser window when a User object is created.
 58        If False, a manual call to self.open_browser() must be made.
 59
 60        :param locator_method: The locator type User should expect to be given.
 61        Can be 'xpath', 'id', 'className', 'name', or 'cssSelector'.
 62        Every member function with a 'locator' argument refers to a string matching
 63        the current locator_method.
 64
 65        :param randomize_user_agent: If True, a random useragent will be used whenever
 66        the browser is opened. If False, the native useragent will be used.
 67
 68        :param user_agent_rotation_period: If not None, the browser window will be closed
 69        and reopened with a new useragent every user_agent_rotation_period number of minutes.
 70        Rotation occurs on the first call to self.get() after the time period has elapsed.
 71        Ignored if randomize_user_agent is False.
 72
 73        :param move_window_by: The x and y amount of pixels to move the browser window by after opening.
 74
 75        :param download_dir: The download folder to use. If None, the default folder will be used.
 76
 77        :param driver_path: The path to the webdriver executable selenium should use.
 78        If None, the system PATH will be checked for the executable.
 79        If the executable isn't found, the parent directories and the immediate child directories
 80        of the current working directory will be searched.
 81        """
 82        self.headless = headless
 83        browser_type = browser_type.lower()
 84        if browser_type in ["firefox", "chrome"]:
 85            self.browser_type = browser_type
 86        else:
 87            raise ValueError("'browser_type' parameter must be 'firefox' or 'chrome'")
 88        self.browser_open = False
 89        self.implicit_wait = implicit_wait
 90        self.page_load_timeout = page_load_timeout
 91        self.rotation_timer = Timer()
 92        self.timer = Timer()
 93        self.timer.start()
 94        self.randomize_user_agent = randomize_user_agent
 95        self.user_agent_rotation_period = user_agent_rotation_period
 96        self.locator_method = locator_method
 97        self.turbo()
 98        self.keys = Keys
 99        self.move_window_by = move_window_by
100        self.download_dir = download_dir
101        self.driver_path = driver_path
102        if not self.driver_path:
103            self.search_for_driver()
104        if open_browser:
105            self.open_browser()
106        else:
107            self.browser = None
108        atexit.register(self.close_browser)
109
110    def __enter__(self):
111        return self
112
113    def __exit__(self, *args):
114        self.close_browser()
115
116    def configure_firefox(self) -> FirefoxService:
117        """Configure options and profile for firefox."""
118        self.options = FirefoxOptions()
119        self.options.headless = self.headless
120        self.options.set_preference(
121            "widget.windows.window_occlusion_tracking.enabled", False
122        )
123        self.options.set_preference("dom.webaudio.enabled", False)
124        if self.randomize_user_agent:
125            self.options.set_preference("general.useragent.override", get_agent())
126        if self.download_dir:
127            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
128            self.profile = FirefoxProfile()
129            self.profile.set_preference("browser.download.dir", str(self.download_dir))
130            self.profile.set_preference("browser.download.folderList", 2)
131        else:
132            self.profile = None
133        self.service = FirefoxService(
134            executable_path=str(self.driver_path), log_path=os.devnull
135        )
136
137    def configure_chrome(self) -> ChromeService:
138        """Configure options and profile for chrome."""
139        self.options = ChromeOptions()
140        self.options.headless = self.headless
141        self.options.add_argument("--disable-blink-features=AutomationControlled")
142        self.options.add_argument("--mute-audio")
143        self.options.add_argument("--disable-infobars")
144        self.options.add_argument("--disable-notifications")
145        self.options.add_argument("--log-level=3")
146        if self.randomize_user_agent:
147            self.options.add_argument(f"--user-agent={get_agent()}")
148        self.options.add_experimental_option("useAutomationExtension", False)
149        if self.download_dir:
150            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
151            self.options.add_experimental_option(
152                "prefs", {"download.default_directory": str(self.download_dir)}
153            )
154        self.service = ChromeService(
155            executable_path=str(self.driver_path), log_path=os.devnull
156        )
157
158    def search_for_driver(self):
159        """Searches for the webdriver executable."""
160        cwd = Path.cwd()
161        found = False
162        match self.browser_type:
163            case "firefox":
164                driver = "geckodriver.exe"
165            case "chrome":
166                driver = "chromedriver.exe"
167        # search PATH
168        env_path = os.environ["PATH"]
169        if sys.platform == "win32":
170            env_paths = env_path.split(";")
171        else:
172            env_paths = env_path.split(":")
173            driver = driver[: driver.find(".")]
174        for path in env_paths:
175            if (Path(path) / driver).exists():
176                self.driver_path = Path(path) / driver
177                found = True
178                break
179        # check current working directory and parent folders
180        if not found:
181            while cwd != cwd.parent:
182                if (cwd / driver).exists():
183                    self.driver_path = cwd / driver
184                    found = True
185                    break
186                cwd = cwd.parent
187            # check top most level
188            if not found and (cwd / driver).exists():
189                self.driver_path = cwd / driver
190                found = True
191        # check child folders (only 1 level down)
192        if not found:
193            for child in Path.cwd().iterdir():
194                if child.is_dir() and (child / driver).exists():
195                    self.driver_path = child / driver
196                    found = True
197        if not found:
198            warn(f"Could not find {driver}")
199
200    def set_implicit_wait(self, wait_time: int = None):
201        """Sets to default time if no arg given."""
202        if not wait_time:
203            self.browser.implicitly_wait(self.implicit_wait)
204        else:
205            self.browser.implicitly_wait(wait_time)
206
207    def open_browser(self):
208        """Configures and opens selenium browser."""
209        if not self.browser_open:
210            match self.browser_type:
211                case "firefox":
212                    self.configure_firefox()
213                    self.browser = webdriver.Firefox(
214                        options=self.options,
215                        service=self.service,
216                        firefox_profile=self.profile,
217                    )
218                case "chrome":
219                    self.configure_chrome()
220                    self.browser = webdriver.Chrome(
221                        options=self.options, service=self.service
222                    )
223            self.set_implicit_wait()
224            self.browser.maximize_window()
225            self.browser.set_window_position(
226                self.move_window_by[0], self.move_window_by[1]
227            )
228            self.browser.maximize_window()
229            self.browser.set_page_load_timeout(self.page_load_timeout)
230            self.browser_open = True
231            self.tab_index = 0
232            self.rotation_timer.start()
233        else:
234            warn("Browser already open.")
235
236    def close_browser(self):
237        """Close browser window."""
238        if self.browser_open:
239            self.browser_open = False
240            self.browser.quit()
241
242    def open_tab(self, url: str = "", switch_to_tab: bool = True):
243        """Opens new tab and, if provided, goes to url.
244
245        New tab is inserted after currently active tab."""
246        self.script("window.open(arguments[0]);", url)
247        if switch_to_tab:
248            self.switch_to_tab(self.tab_index + 1)
249
250    def switch_to_tab(self, tab_index: int):
251        """Switch to a tab in browser, zero indexed."""
252        self.browser.switch_to.window(self.browser.window_handles[tab_index])
253        self.tab_index = tab_index
254
255    def get_num_tabs(self) -> int:
256        """Returns number of tabs open."""
257        return len(self.browser.window_handles)
258
259    def close_tab(self, tab_index: int = 1):
260        """Close specified tab and
261        switches to tab index 0."""
262        self.switch_to_tab(tab_index)
263        self.browser.close()
264        self.switch_to_tab(0)
265
266    def get(self, url: str):
267        """Requests webpage at given url and rotates userAgent if necessary."""
268        if not self.browser_open:
269            self.open_browser()
270        if (
271            self.randomize_user_agent
272            and self.user_agent_rotation_period is not None
273            and self.rotation_timer.check(format=False)
274            > (60 * self.user_agent_rotation_period)
275        ):
276            self.rotation_timer.stop()
277            self.close_browser()
278            self.open_browser()
279        self.browser.get(url)
280        self.script("Object.defineProperty(navigator, 'webdriver', {get: () => false})")
281        self.chill(self.arrival_wait)
282
283    def get_soup(self) -> BeautifulSoup:
284        """Returns a BeautifulSoup object
285        of the current page source."""
286        return BeautifulSoup(self.browser.page_source, "html.parser")
287
288    def current_url(self) -> str:
289        """Returns current url of active tab."""
290        return self.browser.current_url
291
292    def delete_cookies(self):
293        """Delete all cookies for
294        this browser instance."""
295        self.browser.delete_all_cookies()
296
297    def turbo(self, engage: bool = True):
298        """When engaged, strings will be sent
299        to elements all at once and there will be
300        no waiting after actions.
301
302        When disengaged, strings will be sent to elements
303        'one key at a time' with randomized amounts of
304        time between successive keys and after actions."""
305        if engage:
306            self.after_key_wait = (0, 0)
307            self.after_field_wait = (0, 0)
308            self.after_click_wait = (0, 0)
309            self.arrival_wait = (1, 1)
310            self.one_key_at_a_time = False
311            self.turbo_engaged = True
312        else:
313            self.after_key_wait = (0.1, 0.5)
314            self.after_field_wait = (1, 2)
315            self.after_click_wait = (0.25, 1.5)
316            self.arrival_wait = (4, 10)
317            self.one_key_at_a_time = True
318            self.turbo_engaged = False
319
320    def chill(self, min_max: tuple[float, float]):
321        """Sleeps a random amount
322        between min_max[0] and min_max[1]."""
323        time.sleep(random.uniform(min_max[0], min_max[1]))
324
325    def script(self, script: str, args: Any = None) -> Any:
326        """Execute javascript code and returns result."""
327        return self.browser.execute_script(script, args)
328
329    def remove(self, locator: str):
330        """Removes element from DOM."""
331        self.script("arguments[0].remove();", self.find(locator))
332
333    def get_length(self, locator: str) -> int:
334        """Returns number of child elements for a given element."""
335        return int(self.script("return arguments[0].length;", self.find(locator)))
336
337    def find(self, locator: str) -> WebElement:
338        """Finds and returns a WebElement."""
339        match self.locator_method:
340            case "xpath":
341                return self.browser.find_element(By.XPATH, locator)
342            case "id":
343                return self.browser.find_element(By.ID, locator)
344            case "className":
345                return self.browser.find_element(By.CLASS_NAME, locator)
346            case "name":
347                return self.browser.find_element(By.NAME, locator)
348            case "cssSelector":
349                return self.browser.find_element(By.CSS_SELECTOR, locator)
350
351    def find_children(self, locator: str) -> list[WebElement]:
352        """Returns a list of child WebElements
353        for given locator arg."""
354        element = self.find(locator)
355        return element.find_elements("xpath", "./*")
356
357    def scroll(self, amount: int = None, fraction: float = None):
358        """Scroll web page.
359        :param amount: The number of lines to scroll if not None.
360
361        :param fraction: The amount between 0.0 and 1.0
362        of the page height to scroll.
363
364        If values are provided for both arguments,
365        amount will be used.
366
367        If values are provided for neither argument,
368        the entire page length will be scrolled.
369
370        Scrolls one line at a time if self.turbo is False."""
371        if amount:
372            amount_to_scroll = amount
373        elif fraction:
374            amount_to_scroll = int(
375                fraction
376                * (
377                    int(self.script("return document.body.scrollHeight;"))
378                    - int(self.script("return window.pageYOffset;"))
379                )
380            )
381        else:
382            amount_to_scroll = int(self.script("return document.body.scrollHeight;"))
383        if self.turbo_engaged:
384            self.script("window.scrollBy(0,arguments[0]);", amount_to_scroll)
385        else:
386            for _ in range(abs(amount_to_scroll)):
387                if amount_to_scroll >= 0:
388                    self.script("window.scrollBy(0,1);")
389                else:
390                    self.script("window.scrollBy(0,-1);")
391        self.chill(self.after_click_wait)
392
393    def scroll_into_view(self, locator: str) -> WebElement:
394        """Scrolls to a given element and returns the element."""
395        element = self.find(locator)
396        self.script("arguments[0].scroll_into_view();", element)
397        self.chill(self.after_click_wait)
398        return element
399
400    def text(self, locator: str) -> str:
401        """Returns text of WebElement."""
402        return self.find(locator).text
403
404    def click(self, locator: str) -> WebElement:
405        """Clicks on and returns WebElement."""
406        element = self.find(locator)
407        element.click()
408        self.chill(self.after_click_wait)
409        return element
410
411    def clear(self, locator: str) -> WebElement:
412        """Clears content of WebElement if able
413        and then returns WebElement."""
414        element = self.find(locator)
415        element.clear()
416        self.chill(self.after_click_wait)
417        return element
418
419    def switch_to_iframe(self, locator: str):
420        """Switch to an iframe from given locator."""
421        self.browser.switch_to.frame(self.find(locator))
422
423    def switch_to_parent_frame(self):
424        """Move up a frame level from current frame."""
425        self.browser.switch_to.parent_frame()
426
427    def select(
428        self, locator: str, method: str, choice: str | int | tuple
429    ) -> WebElement:
430        """Select a choice from Select element.
431        Returns the Select element from the locator string,
432        not the option element that is selected.
433
434        :param method: Can be 'value' or 'index'
435
436        :param choice: The option to select.
437
438        If method is 'value', then choice should be
439        the html 'value' attribute of the desired option.
440
441        If method is 'index', choice can either be a single
442        int for the desired option or it can be a two-tuple.
443        If the tuple is provided, a random option between the
444        two indicies (inclusive) will be selected."""
445        element = self.click(locator)
446        match method:
447            case "value":
448                Select(element).select_by_value(choice)
449            case "index":
450                if type(choice) == tuple:
451                    choice = random.randint(choice[0], choice[1])
452                Select(element).select_by_index(choice)
453        self.chill(self.after_field_wait)
454        return element
455
456    def click_elements(
457        self, locators: list[str], max_selections: int = None, min_selections: int = 1
458    ) -> WebElement:
459        """Click a random number of WebElements
460        and return the last WebElement clicked.
461
462        :param locators: A list of element locators to choose from.
463
464        :param max_selections: The maximum number of elements to click.
465        If None, the maximum will be the length of the locators list.
466
467        :param min_selections: The minimum number of elements to click.
468
469        e.g. self.click_elements([xpath1, xpath2, xpath3, xpath4], max_selections=3)
470        will click between 1 and 3 random elements from the list.
471        """
472        if not max_selections:
473            max_selections = len(locators)
474        for option in random.sample(
475            locators, k=random.randint(min_selections, max_selections)
476        ):
477            element = self.click(option)
478        return element
479
480    def get_click_list(
481        self, num_options: int, max_choices: int = 1, min_choices: int = 1
482    ) -> list[str]:
483        """Similar to self.click_elements(), but for use with the self.fill_next() method.
484
485        Creates a list of length 'num_options' where every element is 'skip'.
486
487        A random number of elements in the list between 'min_choices' and 'max_choices' are
488        replaced with 'keys.SPACE' (interpreted as a click by almost all web forms)."""
489        click_list = ["skip"] * num_options
490        selected_indexes = []
491        for i in range(random.randint(min_choices, max_choices)):
492            index = random.randint(0, num_options - 1)
493            while index in selected_indexes:
494                index = random.randint(0, num_options - 1)
495            selected_indexes.append(index)
496            click_list[index] = self.keys.SPACE
497        return click_list
498
499    def send_keys(
500        self,
501        locator: str,
502        data: str,
503        click_first: bool = True,
504        clear_first: bool = False,
505    ) -> WebElement:
506        """Types data into element and returns the element.
507
508        :param data: The string to send to the element.
509
510        :param click_first: If True, the element is clicked on
511        before the data is sent.
512
513        :param clear_first: If True, the current text of the element
514        is cleared before the data is sent."""
515        element = self.click(locator) if click_first else self.find(locator)
516        if clear_first:
517            element.clear()
518            self.chill(self.after_click_wait)
519        if self.one_key_at_a_time:
520            for ch in str(data):
521                element.send_keys(ch)
522                self.chill(self.after_key_wait)
523        else:
524            element.send_keys(str(data))
525        self.chill(self.after_field_wait)
526        return element
527
528    def fill_next(
529        self, data: list[str | tuple], start_element: WebElement = None
530    ) -> WebElement:
531        """Fills a form by tabbing from the current WebElement
532        to the next one and using the corresponding item in data.
533        Returns the last WebElement.
534
535        :param data: A list of form data. If an item is a string (except for 'skip')
536        it will be typed into the current WebElement.
537
538        An item in data can be a two-tuple of the form
539        ('downArrow', numberOfPresses:int|tuple[int, int]).
540
541        If numberOfPresses is a single int, Keys.ARROW_DOWN will be sent
542        that many times to the WebElement.
543
544        If numberOfPresses is a tuple, Keys.ARROW_DOWN will be sent a random
545        number of times between numberOfPresses[0] and numberOfPresses[1] inclusive.
546        This is typically for use with Select elements.
547
548        An item in data can also be 'skip', which will perform no action on the current
549        WebElement and will continue to the next one.
550
551        An item in data can also be 'click=n', where 'n' is an integer b/t 0 and 100,
552        representing a percent chance an element will be clicked or skipped:
553        >>> user.fill_next(["click=70"])
554
555        has a 70% chance of being
556        >>> user.fill_next([user.keys.SPACE])
557
558        and a 30% chance of being
559        >>> user.fill_next(["skip"])
560
561
562        :param start_element: The WebElement to start tabbing from.
563        The currently active element will be used if start_element is None.
564
565        Note: The function tabs to the next element before sending data,
566        so the start_element should the WebElement before the one
567        that should receive data[0].
568        """
569        element = (
570            self.browser.switch_to.active_element
571            if not start_element
572            else start_element
573        )
574        for datum in data:
575            element.send_keys(Keys.TAB)
576            element = self.browser.switch_to.active_element
577            self.chill(self.after_key_wait)
578            if type(datum) == str and datum.strip().startswith("click="):
579                chance = int(datum.split("=")[1].strip())
580                if random.randint(0, 100) <= chance:
581                    datum = Keys.SPACE
582                else:
583                    datum = "skip"
584            if datum[0] == "downArrow":
585                if type(datum[1]) == tuple:
586                    times = random.randint(datum[1][0], datum[1][1])
587                else:
588                    times = datum[1]
589                for _ in range(times):
590                    element.send_keys(Keys.ARROW_DOWN)
591                    self.chill(self.after_key_wait)
592            elif datum == "skip":
593                self.chill(self.after_key_wait)
594            else:
595
596                if self.turbo_engaged:
597                    element.send_keys(str(datum))
598                else:
599                    for ch in str(datum):
600                        element.send_keys(ch)
601                        self.chill(self.after_key_wait)
602            self.chill(self.after_field_wait)
603        return element
604
605    def wait_until(
606        self, condition: LambdaType, max_wait: float = 10, polling_interval: float = 0.1
607    ):
608        """Checks condition repeatedly until either it is true,
609        or the max_wait is exceeded.
610
611        Raises a TimeoutError if the condition doesn't success within max_wait.
612
613        Useful for determing whether a form has been successfully submitted.
614
615        :param condition: The condition function to check.
616
617        :param max_wait: Number of seconds to continue checking condition
618        before throwing a TimeoutError.
619
620        :param polling_interval: The number of seconds to sleep before
621        checking the condition function again after it fails.
622
623        e.g. self.wait_until(lambda: 'Successfully Submitted' in self.text('//p[@id="form-output"]))"""
624        start_time = time.time()
625        while True:
626            try:
627                if condition():
628                    time.sleep(1)
629                    break
630                elif (time.time() - start_time) > max_wait:
631                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
632                else:
633                    time.sleep(polling_interval)
634            except:
635                if (time.time() - start_time) > max_wait:
636                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
637                else:
638                    time.sleep(polling_interval)
639
640    def dismiss_alert(self):
641        """Dismiss alert dialog."""
642        self.browser.switch_to.alert.dismiss()
643
644    def solve_recaptcha_v3(
645        self,
646        outer_iframe_xpath: str = '//iframe[@title="reCAPTCHA"]',
647        inner_iframe_xpath: str = '//iframe[@title="recaptcha challenge expires in two minutes"]',
648    ):
649        """Pass google recaptcha v3 by solving an audio puzzle.
650
651        :param outer_iframe_xpath: Xpath to the iframe containing the recaptcha checkbox.
652        If it's the recaptcha without the initial checkbox that just shows the image puzzle,
653        pass None to this argument.
654
655        """
656        locator_method = self.locator_method
657        self.locator_method = "xpath"
658        try:
659            if outer_iframe_xpath:
660                self.switch_to_iframe(outer_iframe_xpath)
661                self.click('//*[@id="recaptcha-anchor"]')
662                self.switch_to_parent_frame()
663            self.switch_to_iframe(inner_iframe_xpath)
664            self.click('//*[@id="recaptcha-audio-button"]')
665            mp3_url = self.find(
666                '//a[@class="rc-audiochallenge-tdownload-link"]'
667            ).get_attribute("href")
668            text = get_text_from_url(mp3_url, ".mp3")
669            self.send_keys('//*[@id="audio-response"]', text)
670            self.click('//*[@id="recaptcha-verify-button"]')
671        except Exception as e:
672            print(e)
673            raise Exception("Could not solve captcha")
674        finally:
675            self.switch_to_parent_frame()
676            self.locator_method = locator_method

Sits on top of selenium to streamline automation and scraping tasks.

User( headless: bool = False, browser_type: str = 'firefox', implicit_wait: int = 10, page_load_timeout: int = 60, open_browser: bool = True, locator_method: str = 'xpath', randomize_user_agent: bool = True, user_agent_rotation_period: int = None, move_window_by: tuple[int, int] = (0, -1000), download_dir: str | pathlib.Path = None, driver_path: str | pathlib.Path = None)
 32    def __init__(
 33        self,
 34        headless: bool = False,
 35        browser_type: str = "firefox",
 36        implicit_wait: int = 10,
 37        page_load_timeout: int = 60,
 38        open_browser: bool = True,
 39        locator_method: str = "xpath",
 40        randomize_user_agent: bool = True,
 41        user_agent_rotation_period: int = None,
 42        move_window_by: tuple[int, int] = (0, -1000),
 43        download_dir: str | Path = None,
 44        driver_path: str | Path = None,
 45    ):
 46        """
 47        :param headless: If True, browser window will not be visible.
 48
 49        :param browser_type: Which browser to use. Can be 'firefox' or 'chrome'.
 50
 51        :param implicit_wait: Number of seconds to look for a specified element before
 52        selenium considers it missing and throws an exception.
 53
 54        :param page_load_timeout: Time in seconds for selenium to wait for a page to load
 55        before throwing an exception.
 56
 57        :param open_browser: If True, opens a browser window when a User object is created.
 58        If False, a manual call to self.open_browser() must be made.
 59
 60        :param locator_method: The locator type User should expect to be given.
 61        Can be 'xpath', 'id', 'className', 'name', or 'cssSelector'.
 62        Every member function with a 'locator' argument refers to a string matching
 63        the current locator_method.
 64
 65        :param randomize_user_agent: If True, a random useragent will be used whenever
 66        the browser is opened. If False, the native useragent will be used.
 67
 68        :param user_agent_rotation_period: If not None, the browser window will be closed
 69        and reopened with a new useragent every user_agent_rotation_period number of minutes.
 70        Rotation occurs on the first call to self.get() after the time period has elapsed.
 71        Ignored if randomize_user_agent is False.
 72
 73        :param move_window_by: The x and y amount of pixels to move the browser window by after opening.
 74
 75        :param download_dir: The download folder to use. If None, the default folder will be used.
 76
 77        :param driver_path: The path to the webdriver executable selenium should use.
 78        If None, the system PATH will be checked for the executable.
 79        If the executable isn't found, the parent directories and the immediate child directories
 80        of the current working directory will be searched.
 81        """
 82        self.headless = headless
 83        browser_type = browser_type.lower()
 84        if browser_type in ["firefox", "chrome"]:
 85            self.browser_type = browser_type
 86        else:
 87            raise ValueError("'browser_type' parameter must be 'firefox' or 'chrome'")
 88        self.browser_open = False
 89        self.implicit_wait = implicit_wait
 90        self.page_load_timeout = page_load_timeout
 91        self.rotation_timer = Timer()
 92        self.timer = Timer()
 93        self.timer.start()
 94        self.randomize_user_agent = randomize_user_agent
 95        self.user_agent_rotation_period = user_agent_rotation_period
 96        self.locator_method = locator_method
 97        self.turbo()
 98        self.keys = Keys
 99        self.move_window_by = move_window_by
100        self.download_dir = download_dir
101        self.driver_path = driver_path
102        if not self.driver_path:
103            self.search_for_driver()
104        if open_browser:
105            self.open_browser()
106        else:
107            self.browser = None
108        atexit.register(self.close_browser)
Parameters
  • headless: If True, browser window will not be visible.

  • browser_type: Which browser to use. Can be 'firefox' or 'chrome'.

  • implicit_wait: Number of seconds to look for a specified element before selenium considers it missing and throws an exception.

  • page_load_timeout: Time in seconds for selenium to wait for a page to load before throwing an exception.

  • open_browser: If True, opens a browser window when a User object is created. If False, a manual call to self.open_browser() must be made.

  • locator_method: The locator type User should expect to be given. Can be 'xpath', 'id', 'className', 'name', or 'cssSelector'. Every member function with a 'locator' argument refers to a string matching the current locator_method.

  • randomize_user_agent: If True, a random useragent will be used whenever the browser is opened. If False, the native useragent will be used.

  • user_agent_rotation_period: If not None, the browser window will be closed and reopened with a new useragent every user_agent_rotation_period number of minutes. Rotation occurs on the first call to self.get() after the time period has elapsed. Ignored if randomize_user_agent is False.

  • move_window_by: The x and y amount of pixels to move the browser window by after opening.

  • download_dir: The download folder to use. If None, the default folder will be used.

  • driver_path: The path to the webdriver executable selenium should use. If None, the system PATH will be checked for the executable. If the executable isn't found, the parent directories and the immediate child directories of the current working directory will be searched.

def configure_firefox(self) -> selenium.webdriver.firefox.service.Service:
116    def configure_firefox(self) -> FirefoxService:
117        """Configure options and profile for firefox."""
118        self.options = FirefoxOptions()
119        self.options.headless = self.headless
120        self.options.set_preference(
121            "widget.windows.window_occlusion_tracking.enabled", False
122        )
123        self.options.set_preference("dom.webaudio.enabled", False)
124        if self.randomize_user_agent:
125            self.options.set_preference("general.useragent.override", get_agent())
126        if self.download_dir:
127            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
128            self.profile = FirefoxProfile()
129            self.profile.set_preference("browser.download.dir", str(self.download_dir))
130            self.profile.set_preference("browser.download.folderList", 2)
131        else:
132            self.profile = None
133        self.service = FirefoxService(
134            executable_path=str(self.driver_path), log_path=os.devnull
135        )

Configure options and profile for firefox.

def configure_chrome(self) -> selenium.webdriver.chrome.service.Service:
137    def configure_chrome(self) -> ChromeService:
138        """Configure options and profile for chrome."""
139        self.options = ChromeOptions()
140        self.options.headless = self.headless
141        self.options.add_argument("--disable-blink-features=AutomationControlled")
142        self.options.add_argument("--mute-audio")
143        self.options.add_argument("--disable-infobars")
144        self.options.add_argument("--disable-notifications")
145        self.options.add_argument("--log-level=3")
146        if self.randomize_user_agent:
147            self.options.add_argument(f"--user-agent={get_agent()}")
148        self.options.add_experimental_option("useAutomationExtension", False)
149        if self.download_dir:
150            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
151            self.options.add_experimental_option(
152                "prefs", {"download.default_directory": str(self.download_dir)}
153            )
154        self.service = ChromeService(
155            executable_path=str(self.driver_path), log_path=os.devnull
156        )

Configure options and profile for chrome.

def search_for_driver(self):
158    def search_for_driver(self):
159        """Searches for the webdriver executable."""
160        cwd = Path.cwd()
161        found = False
162        match self.browser_type:
163            case "firefox":
164                driver = "geckodriver.exe"
165            case "chrome":
166                driver = "chromedriver.exe"
167        # search PATH
168        env_path = os.environ["PATH"]
169        if sys.platform == "win32":
170            env_paths = env_path.split(";")
171        else:
172            env_paths = env_path.split(":")
173            driver = driver[: driver.find(".")]
174        for path in env_paths:
175            if (Path(path) / driver).exists():
176                self.driver_path = Path(path) / driver
177                found = True
178                break
179        # check current working directory and parent folders
180        if not found:
181            while cwd != cwd.parent:
182                if (cwd / driver).exists():
183                    self.driver_path = cwd / driver
184                    found = True
185                    break
186                cwd = cwd.parent
187            # check top most level
188            if not found and (cwd / driver).exists():
189                self.driver_path = cwd / driver
190                found = True
191        # check child folders (only 1 level down)
192        if not found:
193            for child in Path.cwd().iterdir():
194                if child.is_dir() and (child / driver).exists():
195                    self.driver_path = child / driver
196                    found = True
197        if not found:
198            warn(f"Could not find {driver}")

Searches for the webdriver executable.

def set_implicit_wait(self, wait_time: int = None):
200    def set_implicit_wait(self, wait_time: int = None):
201        """Sets to default time if no arg given."""
202        if not wait_time:
203            self.browser.implicitly_wait(self.implicit_wait)
204        else:
205            self.browser.implicitly_wait(wait_time)

Sets to default time if no arg given.

def open_browser(self):
207    def open_browser(self):
208        """Configures and opens selenium browser."""
209        if not self.browser_open:
210            match self.browser_type:
211                case "firefox":
212                    self.configure_firefox()
213                    self.browser = webdriver.Firefox(
214                        options=self.options,
215                        service=self.service,
216                        firefox_profile=self.profile,
217                    )
218                case "chrome":
219                    self.configure_chrome()
220                    self.browser = webdriver.Chrome(
221                        options=self.options, service=self.service
222                    )
223            self.set_implicit_wait()
224            self.browser.maximize_window()
225            self.browser.set_window_position(
226                self.move_window_by[0], self.move_window_by[1]
227            )
228            self.browser.maximize_window()
229            self.browser.set_page_load_timeout(self.page_load_timeout)
230            self.browser_open = True
231            self.tab_index = 0
232            self.rotation_timer.start()
233        else:
234            warn("Browser already open.")

Configures and opens selenium browser.

def close_browser(self):
236    def close_browser(self):
237        """Close browser window."""
238        if self.browser_open:
239            self.browser_open = False
240            self.browser.quit()

Close browser window.

def open_tab(self, url: str = '', switch_to_tab: bool = True):
242    def open_tab(self, url: str = "", switch_to_tab: bool = True):
243        """Opens new tab and, if provided, goes to url.
244
245        New tab is inserted after currently active tab."""
246        self.script("window.open(arguments[0]);", url)
247        if switch_to_tab:
248            self.switch_to_tab(self.tab_index + 1)

Opens new tab and, if provided, goes to url.

New tab is inserted after currently active tab.

def switch_to_tab(self, tab_index: int):
250    def switch_to_tab(self, tab_index: int):
251        """Switch to a tab in browser, zero indexed."""
252        self.browser.switch_to.window(self.browser.window_handles[tab_index])
253        self.tab_index = tab_index

Switch to a tab in browser, zero indexed.

def get_num_tabs(self) -> int:
255    def get_num_tabs(self) -> int:
256        """Returns number of tabs open."""
257        return len(self.browser.window_handles)

Returns number of tabs open.

def close_tab(self, tab_index: int = 1):
259    def close_tab(self, tab_index: int = 1):
260        """Close specified tab and
261        switches to tab index 0."""
262        self.switch_to_tab(tab_index)
263        self.browser.close()
264        self.switch_to_tab(0)

Close specified tab and switches to tab index 0.

def get(self, url: str):
266    def get(self, url: str):
267        """Requests webpage at given url and rotates userAgent if necessary."""
268        if not self.browser_open:
269            self.open_browser()
270        if (
271            self.randomize_user_agent
272            and self.user_agent_rotation_period is not None
273            and self.rotation_timer.check(format=False)
274            > (60 * self.user_agent_rotation_period)
275        ):
276            self.rotation_timer.stop()
277            self.close_browser()
278            self.open_browser()
279        self.browser.get(url)
280        self.script("Object.defineProperty(navigator, 'webdriver', {get: () => false})")
281        self.chill(self.arrival_wait)

Requests webpage at given url and rotates userAgent if necessary.

def get_soup(self) -> bs4.BeautifulSoup:
283    def get_soup(self) -> BeautifulSoup:
284        """Returns a BeautifulSoup object
285        of the current page source."""
286        return BeautifulSoup(self.browser.page_source, "html.parser")

Returns a BeautifulSoup object of the current page source.

def current_url(self) -> str:
288    def current_url(self) -> str:
289        """Returns current url of active tab."""
290        return self.browser.current_url

Returns current url of active tab.

def delete_cookies(self):
292    def delete_cookies(self):
293        """Delete all cookies for
294        this browser instance."""
295        self.browser.delete_all_cookies()

Delete all cookies for this browser instance.

def turbo(self, engage: bool = True):
297    def turbo(self, engage: bool = True):
298        """When engaged, strings will be sent
299        to elements all at once and there will be
300        no waiting after actions.
301
302        When disengaged, strings will be sent to elements
303        'one key at a time' with randomized amounts of
304        time between successive keys and after actions."""
305        if engage:
306            self.after_key_wait = (0, 0)
307            self.after_field_wait = (0, 0)
308            self.after_click_wait = (0, 0)
309            self.arrival_wait = (1, 1)
310            self.one_key_at_a_time = False
311            self.turbo_engaged = True
312        else:
313            self.after_key_wait = (0.1, 0.5)
314            self.after_field_wait = (1, 2)
315            self.after_click_wait = (0.25, 1.5)
316            self.arrival_wait = (4, 10)
317            self.one_key_at_a_time = True
318            self.turbo_engaged = False

When engaged, strings will be sent to elements all at once and there will be no waiting after actions.

When disengaged, strings will be sent to elements 'one key at a time' with randomized amounts of time between successive keys and after actions.

def chill(self, min_max: tuple[float, float]):
320    def chill(self, min_max: tuple[float, float]):
321        """Sleeps a random amount
322        between min_max[0] and min_max[1]."""
323        time.sleep(random.uniform(min_max[0], min_max[1]))

Sleeps a random amount between min_max[0] and min_max[1].

def script(self, script: str, args: Any = None) -> Any:
325    def script(self, script: str, args: Any = None) -> Any:
326        """Execute javascript code and returns result."""
327        return self.browser.execute_script(script, args)

Execute javascript code and returns result.

def remove(self, locator: str):
329    def remove(self, locator: str):
330        """Removes element from DOM."""
331        self.script("arguments[0].remove();", self.find(locator))

Removes element from DOM.

def get_length(self, locator: str) -> int:
333    def get_length(self, locator: str) -> int:
334        """Returns number of child elements for a given element."""
335        return int(self.script("return arguments[0].length;", self.find(locator)))

Returns number of child elements for a given element.

def find(self, locator: str) -> selenium.webdriver.remote.webelement.WebElement:
337    def find(self, locator: str) -> WebElement:
338        """Finds and returns a WebElement."""
339        match self.locator_method:
340            case "xpath":
341                return self.browser.find_element(By.XPATH, locator)
342            case "id":
343                return self.browser.find_element(By.ID, locator)
344            case "className":
345                return self.browser.find_element(By.CLASS_NAME, locator)
346            case "name":
347                return self.browser.find_element(By.NAME, locator)
348            case "cssSelector":
349                return self.browser.find_element(By.CSS_SELECTOR, locator)

Finds and returns a WebElement.

def find_children( self, locator: str) -> list[selenium.webdriver.remote.webelement.WebElement]:
351    def find_children(self, locator: str) -> list[WebElement]:
352        """Returns a list of child WebElements
353        for given locator arg."""
354        element = self.find(locator)
355        return element.find_elements("xpath", "./*")

Returns a list of child WebElements for given locator arg.

def scroll(self, amount: int = None, fraction: float = None):
357    def scroll(self, amount: int = None, fraction: float = None):
358        """Scroll web page.
359        :param amount: The number of lines to scroll if not None.
360
361        :param fraction: The amount between 0.0 and 1.0
362        of the page height to scroll.
363
364        If values are provided for both arguments,
365        amount will be used.
366
367        If values are provided for neither argument,
368        the entire page length will be scrolled.
369
370        Scrolls one line at a time if self.turbo is False."""
371        if amount:
372            amount_to_scroll = amount
373        elif fraction:
374            amount_to_scroll = int(
375                fraction
376                * (
377                    int(self.script("return document.body.scrollHeight;"))
378                    - int(self.script("return window.pageYOffset;"))
379                )
380            )
381        else:
382            amount_to_scroll = int(self.script("return document.body.scrollHeight;"))
383        if self.turbo_engaged:
384            self.script("window.scrollBy(0,arguments[0]);", amount_to_scroll)
385        else:
386            for _ in range(abs(amount_to_scroll)):
387                if amount_to_scroll >= 0:
388                    self.script("window.scrollBy(0,1);")
389                else:
390                    self.script("window.scrollBy(0,-1);")
391        self.chill(self.after_click_wait)

Scroll web page.

Parameters
  • amount: The number of lines to scroll if not None.

  • fraction: The amount between 0.0 and 1.0 of the page height to scroll.

If values are provided for both arguments, amount will be used.

If values are provided for neither argument, the entire page length will be scrolled.

Scrolls one line at a time if self.turbo is False.

def scroll_into_view(self, locator: str) -> selenium.webdriver.remote.webelement.WebElement:
393    def scroll_into_view(self, locator: str) -> WebElement:
394        """Scrolls to a given element and returns the element."""
395        element = self.find(locator)
396        self.script("arguments[0].scroll_into_view();", element)
397        self.chill(self.after_click_wait)
398        return element

Scrolls to a given element and returns the element.

def text(self, locator: str) -> str:
400    def text(self, locator: str) -> str:
401        """Returns text of WebElement."""
402        return self.find(locator).text

Returns text of WebElement.

def click(self, locator: str) -> selenium.webdriver.remote.webelement.WebElement:
404    def click(self, locator: str) -> WebElement:
405        """Clicks on and returns WebElement."""
406        element = self.find(locator)
407        element.click()
408        self.chill(self.after_click_wait)
409        return element

Clicks on and returns WebElement.

def clear(self, locator: str) -> selenium.webdriver.remote.webelement.WebElement:
411    def clear(self, locator: str) -> WebElement:
412        """Clears content of WebElement if able
413        and then returns WebElement."""
414        element = self.find(locator)
415        element.clear()
416        self.chill(self.after_click_wait)
417        return element

Clears content of WebElement if able and then returns WebElement.

def switch_to_iframe(self, locator: str):
419    def switch_to_iframe(self, locator: str):
420        """Switch to an iframe from given locator."""
421        self.browser.switch_to.frame(self.find(locator))

Switch to an iframe from given locator.

def switch_to_parent_frame(self):
423    def switch_to_parent_frame(self):
424        """Move up a frame level from current frame."""
425        self.browser.switch_to.parent_frame()

Move up a frame level from current frame.

def select( self, locator: str, method: str, choice: str | int | tuple) -> selenium.webdriver.remote.webelement.WebElement:
427    def select(
428        self, locator: str, method: str, choice: str | int | tuple
429    ) -> WebElement:
430        """Select a choice from Select element.
431        Returns the Select element from the locator string,
432        not the option element that is selected.
433
434        :param method: Can be 'value' or 'index'
435
436        :param choice: The option to select.
437
438        If method is 'value', then choice should be
439        the html 'value' attribute of the desired option.
440
441        If method is 'index', choice can either be a single
442        int for the desired option or it can be a two-tuple.
443        If the tuple is provided, a random option between the
444        two indicies (inclusive) will be selected."""
445        element = self.click(locator)
446        match method:
447            case "value":
448                Select(element).select_by_value(choice)
449            case "index":
450                if type(choice) == tuple:
451                    choice = random.randint(choice[0], choice[1])
452                Select(element).select_by_index(choice)
453        self.chill(self.after_field_wait)
454        return element

Select a choice from Select element. Returns the Select element from the locator string, not the option element that is selected.

Parameters
  • method: Can be 'value' or 'index'

  • choice: The option to select.

If method is 'value', then choice should be the html 'value' attribute of the desired option.

If method is 'index', choice can either be a single int for the desired option or it can be a two-tuple. If the tuple is provided, a random option between the two indicies (inclusive) will be selected.

def click_elements( self, locators: list[str], max_selections: int = None, min_selections: int = 1) -> selenium.webdriver.remote.webelement.WebElement:
456    def click_elements(
457        self, locators: list[str], max_selections: int = None, min_selections: int = 1
458    ) -> WebElement:
459        """Click a random number of WebElements
460        and return the last WebElement clicked.
461
462        :param locators: A list of element locators to choose from.
463
464        :param max_selections: The maximum number of elements to click.
465        If None, the maximum will be the length of the locators list.
466
467        :param min_selections: The minimum number of elements to click.
468
469        e.g. self.click_elements([xpath1, xpath2, xpath3, xpath4], max_selections=3)
470        will click between 1 and 3 random elements from the list.
471        """
472        if not max_selections:
473            max_selections = len(locators)
474        for option in random.sample(
475            locators, k=random.randint(min_selections, max_selections)
476        ):
477            element = self.click(option)
478        return element

Click a random number of WebElements and return the last WebElement clicked.

Parameters
  • locators: A list of element locators to choose from.

  • max_selections: The maximum number of elements to click. If None, the maximum will be the length of the locators list.

  • min_selections: The minimum number of elements to click.

e.g. self.click_elements([xpath1, xpath2, xpath3, xpath4], max_selections=3) will click between 1 and 3 random elements from the list.

def get_click_list( self, num_options: int, max_choices: int = 1, min_choices: int = 1) -> list[str]:
480    def get_click_list(
481        self, num_options: int, max_choices: int = 1, min_choices: int = 1
482    ) -> list[str]:
483        """Similar to self.click_elements(), but for use with the self.fill_next() method.
484
485        Creates a list of length 'num_options' where every element is 'skip'.
486
487        A random number of elements in the list between 'min_choices' and 'max_choices' are
488        replaced with 'keys.SPACE' (interpreted as a click by almost all web forms)."""
489        click_list = ["skip"] * num_options
490        selected_indexes = []
491        for i in range(random.randint(min_choices, max_choices)):
492            index = random.randint(0, num_options - 1)
493            while index in selected_indexes:
494                index = random.randint(0, num_options - 1)
495            selected_indexes.append(index)
496            click_list[index] = self.keys.SPACE
497        return click_list

Similar to self.click_elements(), but for use with the self.fill_next() method.

Creates a list of length 'num_options' where every element is 'skip'.

A random number of elements in the list between 'min_choices' and 'max_choices' are replaced with 'keys.SPACE' (interpreted as a click by almost all web forms).

def send_keys( self, locator: str, data: str, click_first: bool = True, clear_first: bool = False) -> selenium.webdriver.remote.webelement.WebElement:
499    def send_keys(
500        self,
501        locator: str,
502        data: str,
503        click_first: bool = True,
504        clear_first: bool = False,
505    ) -> WebElement:
506        """Types data into element and returns the element.
507
508        :param data: The string to send to the element.
509
510        :param click_first: If True, the element is clicked on
511        before the data is sent.
512
513        :param clear_first: If True, the current text of the element
514        is cleared before the data is sent."""
515        element = self.click(locator) if click_first else self.find(locator)
516        if clear_first:
517            element.clear()
518            self.chill(self.after_click_wait)
519        if self.one_key_at_a_time:
520            for ch in str(data):
521                element.send_keys(ch)
522                self.chill(self.after_key_wait)
523        else:
524            element.send_keys(str(data))
525        self.chill(self.after_field_wait)
526        return element

Types data into element and returns the element.

Parameters
  • data: The string to send to the element.

  • click_first: If True, the element is clicked on before the data is sent.

  • clear_first: If True, the current text of the element is cleared before the data is sent.

def fill_next( self, data: list[str | tuple], start_element: selenium.webdriver.remote.webelement.WebElement = None) -> selenium.webdriver.remote.webelement.WebElement:
528    def fill_next(
529        self, data: list[str | tuple], start_element: WebElement = None
530    ) -> WebElement:
531        """Fills a form by tabbing from the current WebElement
532        to the next one and using the corresponding item in data.
533        Returns the last WebElement.
534
535        :param data: A list of form data. If an item is a string (except for 'skip')
536        it will be typed into the current WebElement.
537
538        An item in data can be a two-tuple of the form
539        ('downArrow', numberOfPresses:int|tuple[int, int]).
540
541        If numberOfPresses is a single int, Keys.ARROW_DOWN will be sent
542        that many times to the WebElement.
543
544        If numberOfPresses is a tuple, Keys.ARROW_DOWN will be sent a random
545        number of times between numberOfPresses[0] and numberOfPresses[1] inclusive.
546        This is typically for use with Select elements.
547
548        An item in data can also be 'skip', which will perform no action on the current
549        WebElement and will continue to the next one.
550
551        An item in data can also be 'click=n', where 'n' is an integer b/t 0 and 100,
552        representing a percent chance an element will be clicked or skipped:
553        >>> user.fill_next(["click=70"])
554
555        has a 70% chance of being
556        >>> user.fill_next([user.keys.SPACE])
557
558        and a 30% chance of being
559        >>> user.fill_next(["skip"])
560
561
562        :param start_element: The WebElement to start tabbing from.
563        The currently active element will be used if start_element is None.
564
565        Note: The function tabs to the next element before sending data,
566        so the start_element should the WebElement before the one
567        that should receive data[0].
568        """
569        element = (
570            self.browser.switch_to.active_element
571            if not start_element
572            else start_element
573        )
574        for datum in data:
575            element.send_keys(Keys.TAB)
576            element = self.browser.switch_to.active_element
577            self.chill(self.after_key_wait)
578            if type(datum) == str and datum.strip().startswith("click="):
579                chance = int(datum.split("=")[1].strip())
580                if random.randint(0, 100) <= chance:
581                    datum = Keys.SPACE
582                else:
583                    datum = "skip"
584            if datum[0] == "downArrow":
585                if type(datum[1]) == tuple:
586                    times = random.randint(datum[1][0], datum[1][1])
587                else:
588                    times = datum[1]
589                for _ in range(times):
590                    element.send_keys(Keys.ARROW_DOWN)
591                    self.chill(self.after_key_wait)
592            elif datum == "skip":
593                self.chill(self.after_key_wait)
594            else:
595
596                if self.turbo_engaged:
597                    element.send_keys(str(datum))
598                else:
599                    for ch in str(datum):
600                        element.send_keys(ch)
601                        self.chill(self.after_key_wait)
602            self.chill(self.after_field_wait)
603        return element

Fills a form by tabbing from the current WebElement to the next one and using the corresponding item in data. Returns the last WebElement.

Parameters
  • data: A list of form data. If an item is a string (except for 'skip') it will be typed into the current WebElement.

An item in data can be a two-tuple of the form ('downArrow', numberOfPresses:int|tuple[int, int]).

If numberOfPresses is a single int, Keys.ARROW_DOWN will be sent that many times to the WebElement.

If numberOfPresses is a tuple, Keys.ARROW_DOWN will be sent a random number of times between numberOfPresses[0] and numberOfPresses[1] inclusive. This is typically for use with Select elements.

An item in data can also be 'skip', which will perform no action on the current WebElement and will continue to the next one.

An item in data can also be 'click=n', where 'n' is an integer b/t 0 and 100, representing a percent chance an element will be clicked or skipped:

>>> user.fill_next(["click=70"])

has a 70% chance of being

>>> user.fill_next([user.keys.SPACE])

and a 30% chance of being

>>> user.fill_next(["skip"])
  • start_element: The WebElement to start tabbing from. The currently active element will be used if start_element is None.

Note: The function tabs to the next element before sending data, so the start_element should the WebElement before the one that should receive data[0].

def wait_until( self, condition: function, max_wait: float = 10, polling_interval: float = 0.1):
605    def wait_until(
606        self, condition: LambdaType, max_wait: float = 10, polling_interval: float = 0.1
607    ):
608        """Checks condition repeatedly until either it is true,
609        or the max_wait is exceeded.
610
611        Raises a TimeoutError if the condition doesn't success within max_wait.
612
613        Useful for determing whether a form has been successfully submitted.
614
615        :param condition: The condition function to check.
616
617        :param max_wait: Number of seconds to continue checking condition
618        before throwing a TimeoutError.
619
620        :param polling_interval: The number of seconds to sleep before
621        checking the condition function again after it fails.
622
623        e.g. self.wait_until(lambda: 'Successfully Submitted' in self.text('//p[@id="form-output"]))"""
624        start_time = time.time()
625        while True:
626            try:
627                if condition():
628                    time.sleep(1)
629                    break
630                elif (time.time() - start_time) > max_wait:
631                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
632                else:
633                    time.sleep(polling_interval)
634            except:
635                if (time.time() - start_time) > max_wait:
636                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
637                else:
638                    time.sleep(polling_interval)

Checks condition repeatedly until either it is true, or the max_wait is exceeded.

Raises a TimeoutError if the condition doesn't success within max_wait.

Useful for determing whether a form has been successfully submitted.

Parameters
  • condition: The condition function to check.

  • max_wait: Number of seconds to continue checking condition before throwing a TimeoutError.

  • polling_interval: The number of seconds to sleep before checking the condition function again after it fails.

e.g. self.wait_until(lambda: 'Successfully Submitted' in self.text('//p[@id="form-output"]))

def dismiss_alert(self):
640    def dismiss_alert(self):
641        """Dismiss alert dialog."""
642        self.browser.switch_to.alert.dismiss()

Dismiss alert dialog.

def solve_recaptcha_v3( self, outer_iframe_xpath: str = '//iframe[@title="reCAPTCHA"]', inner_iframe_xpath: str = '//iframe[@title="recaptcha challenge expires in two minutes"]'):
644    def solve_recaptcha_v3(
645        self,
646        outer_iframe_xpath: str = '//iframe[@title="reCAPTCHA"]',
647        inner_iframe_xpath: str = '//iframe[@title="recaptcha challenge expires in two minutes"]',
648    ):
649        """Pass google recaptcha v3 by solving an audio puzzle.
650
651        :param outer_iframe_xpath: Xpath to the iframe containing the recaptcha checkbox.
652        If it's the recaptcha without the initial checkbox that just shows the image puzzle,
653        pass None to this argument.
654
655        """
656        locator_method = self.locator_method
657        self.locator_method = "xpath"
658        try:
659            if outer_iframe_xpath:
660                self.switch_to_iframe(outer_iframe_xpath)
661                self.click('//*[@id="recaptcha-anchor"]')
662                self.switch_to_parent_frame()
663            self.switch_to_iframe(inner_iframe_xpath)
664            self.click('//*[@id="recaptcha-audio-button"]')
665            mp3_url = self.find(
666                '//a[@class="rc-audiochallenge-tdownload-link"]'
667            ).get_attribute("href")
668            text = get_text_from_url(mp3_url, ".mp3")
669            self.send_keys('//*[@id="audio-response"]', text)
670            self.click('//*[@id="recaptcha-verify-button"]')
671        except Exception as e:
672            print(e)
673            raise Exception("Could not solve captcha")
674        finally:
675            self.switch_to_parent_frame()
676            self.locator_method = locator_method

Pass google recaptcha v3 by solving an audio puzzle.

Parameters
  • outer_iframe_xpath: Xpath to the iframe containing the recaptcha checkbox. If it's the recaptcha without the initial checkbox that just shows the image puzzle, pass None to this argument.