seleniumuser.seleniumuser

  1import atexit
  2import os
  3import random
  4import sys
  5import time
  6from pathlib import Path
  7from types import LambdaType
  8from typing import Any
  9from warnings import warn
 10
 11from bs4 import BeautifulSoup
 12from noiftimer import Timer
 13from selenium import webdriver
 14from selenium.webdriver.chrome.options import Options as ChromeOptions
 15from selenium.webdriver.chrome.service import Service as ChromeService
 16from selenium.webdriver.common.by import By
 17from selenium.webdriver.common.keys import Keys
 18from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
 19from selenium.webdriver.firefox.options import Options as FirefoxOptions
 20from selenium.webdriver.firefox.service import Service as FirefoxService
 21from selenium.webdriver.remote.webelement import WebElement
 22from selenium.webdriver.support.ui import Select
 23from voxscribe import get_text_from_url
 24from whosyouragent import get_agent
 25
 26
 27class User:
 28    """Sits on top of selenium to streamline
 29    automation and scraping tasks."""
 30
 31    def __init__(
 32        self,
 33        headless: bool = False,
 34        browser_type: str = "firefox",
 35        implicit_wait: int = 10,
 36        page_load_timeout: int = 60,
 37        open_browser: bool = True,
 38        locator_method: str = "xpath",
 39        randomize_user_agent: bool = True,
 40        user_agent_rotation_period: int = None,
 41        move_window_by: tuple[int, int] = (0, -1000),
 42        download_dir: str | Path = None,
 43        driver_path: str | Path = None,
 44    ):
 45        """
 46        :param headless: If True, browser window will not be visible.
 47
 48        :param browser_type: Which browser to use. Can be 'firefox' or 'chrome'.
 49
 50        :param implicit_wait: Number of seconds to look for a specified element before
 51        selenium considers it missing and throws an exception.
 52
 53        :param page_load_timeout: Time in seconds for selenium to wait for a page to load
 54        before throwing an exception.
 55
 56        :param open_browser: If True, opens a browser window when a User object is created.
 57        If False, a manual call to self.open_browser() must be made.
 58
 59        :param locator_method: The locator type User should expect to be given.
 60        Can be 'xpath', 'id', 'className', 'name', or 'cssSelector'.
 61        Every member function with a 'locator' argument refers to a string matching
 62        the current locator_method.
 63
 64        :param randomize_user_agent: If True, a random useragent will be used whenever
 65        the browser is opened. If False, the native useragent will be used.
 66
 67        :param user_agent_rotation_period: If not None, the browser window will be closed
 68        and reopened with a new useragent every user_agent_rotation_period number of minutes.
 69        Rotation occurs on the first call to self.get() after the time period has elapsed.
 70        Ignored if randomize_user_agent is False.
 71
 72        :param move_window_by: The x and y amount of pixels to move the browser window by after opening.
 73
 74        :param download_dir: The download folder to use. If None, the default folder will be used.
 75
 76        :param driver_path: The path to the webdriver executable selenium should use.
 77        If None, the system PATH will be checked for the executable.
 78        If the executable isn't found, the parent directories and the immediate child directories
 79        of the current working directory will be searched.
 80        """
 81        self.headless = headless
 82        browser_type = browser_type.lower()
 83        if browser_type in ["firefox", "chrome"]:
 84            self.browser_type = browser_type
 85        else:
 86            raise ValueError("'browser_type' parameter must be 'firefox' or 'chrome'")
 87        self.browser_open = False
 88        self.implicit_wait = implicit_wait
 89        self.page_load_timeout = page_load_timeout
 90        self.rotation_timer = Timer()
 91        self.randomize_user_agent = randomize_user_agent
 92        self.user_agent_rotation_period = user_agent_rotation_period
 93        self.locator_method = locator_method
 94        self.turbo()
 95        self.keys = Keys
 96        self.move_window_by = move_window_by
 97        self.download_dir = download_dir
 98        self.driver_path = driver_path
 99        if not self.driver_path:
100            self.search_for_driver()
101        if open_browser:
102            self.open_browser()
103        else:
104            self.browser = None
105        atexit.register(self.close_browser)
106
107    def __enter__(self):
108        return self
109
110    def __exit__(self, *args):
111        self.close_browser()
112
113    def configure_firefox(self) -> FirefoxService:
114        """Configure options and profile for firefox."""
115        self.options = FirefoxOptions()
116        self.options.headless = self.headless
117        self.options.set_preference(
118            "widget.windows.window_occlusion_tracking.enabled", False
119        )
120        self.options.set_preference("dom.webaudio.enabled", False)
121        if self.randomize_user_agent:
122            self.options.set_preference("general.useragent.override", get_agent())
123        if self.download_dir:
124            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
125            self.profile = FirefoxProfile()
126            self.profile.set_preference("browser.download.dir", str(self.download_dir))
127            self.profile.set_preference("browser.download.folderList", 2)
128        else:
129            self.profile = None
130        self.service = FirefoxService(
131            executable_path=str(self.driver_path), log_path=os.devnull
132        )
133
134    def configure_chrome(self) -> ChromeService:
135        """Configure options and profile for chrome."""
136        self.options = ChromeOptions()
137        self.options.headless = self.headless
138        self.options.add_argument("--disable-blink-features=AutomationControlled")
139        self.options.add_argument("--mute-audio")
140        self.options.add_argument("--disable-infobars")
141        self.options.add_argument("--disable-notifications")
142        self.options.add_argument("--log-level=3")
143        if self.randomize_user_agent:
144            self.options.add_argument(f"--user-agent={get_agent()}")
145        self.options.add_experimental_option("useAutomationExtension", False)
146        if self.download_dir:
147            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
148            self.options.add_experimental_option(
149                "prefs", {"download.default_directory": str(self.download_dir)}
150            )
151        self.service = ChromeService(
152            executable_path=str(self.driver_path), log_path=os.devnull
153        )
154
155    def search_for_driver(self):
156        """Searches for the webdriver executable."""
157        cwd = Path.cwd()
158        found = False
159        match self.browser_type:
160            case "firefox":
161                driver = "geckodriver.exe"
162            case "chrome":
163                driver = "chromedriver.exe"
164        # search PATH
165        env_path = os.environ["PATH"]
166        if sys.platform == "win32":
167            env_paths = env_path.split(";")
168        else:
169            env_paths = env_path.split(":")
170            driver = driver[: driver.find(".")]
171        for path in env_paths:
172            if (Path(path) / driver).exists():
173                self.driver_path = Path(path) / driver
174                found = True
175                break
176        # check current working directory and parent folders
177        if not found:
178            while cwd != cwd.parent:
179                if (cwd / driver).exists():
180                    self.driver_path = cwd / driver
181                    found = True
182                    break
183                cwd = cwd.parent
184            # check top most level
185            if not found and (cwd / driver).exists():
186                self.driver_path = cwd / driver
187                found = True
188        # check child folders (only 1 level down)
189        if not found:
190            for child in Path.cwd().iterdir():
191                if child.is_dir() and (child / driver).exists():
192                    self.driver_path = child / driver
193                    found = True
194        if not found:
195            warn(f"Could not find {driver}")
196
197    def set_implicit_wait(self, wait_time: int = None):
198        """Sets to default time if no arg given."""
199        if not wait_time:
200            self.browser.implicitly_wait(self.implicit_wait)
201        else:
202            self.browser.implicitly_wait(wait_time)
203
204    def open_browser(self):
205        """Configures and opens selenium browser."""
206        if not self.browser_open:
207            match self.browser_type:
208                case "firefox":
209                    self.configure_firefox()
210                    self.browser = webdriver.Firefox(
211                        options=self.options,
212                        service=self.service,
213                        firefox_profile=self.profile,
214                    )
215                case "chrome":
216                    self.configure_chrome()
217                    self.browser = webdriver.Chrome(
218                        options=self.options, service=self.service
219                    )
220            self.set_implicit_wait()
221            self.browser.maximize_window()
222            self.browser.set_window_position(
223                self.move_window_by[0], self.move_window_by[1]
224            )
225            self.browser.maximize_window()
226            self.browser.set_page_load_timeout(self.page_load_timeout)
227            self.browser_open = True
228            self.tab_index = 0
229            self.rotation_timer.start()
230        else:
231            warn("Browser already open.")
232
233    def close_browser(self):
234        """Close browser window."""
235        if self.browser_open:
236            self.browser_open = False
237            self.browser.quit()
238
239    def open_tab(self, url: str = "", switch_to_tab: bool = True):
240        """Opens new tab and, if provided, goes to url.
241
242        New tab is inserted after currently active tab."""
243        self.script("window.open(arguments[0]);", url)
244        if switch_to_tab:
245            self.switch_to_tab(self.tab_index + 1)
246
247    def switch_to_tab(self, tab_index: int):
248        """Switch to a tab in browser, zero indexed."""
249        self.browser.switch_to.window(self.browser.window_handles[tab_index])
250        self.tab_index = tab_index
251
252    def get_num_tabs(self) -> int:
253        """Returns number of tabs open."""
254        return len(self.browser.window_handles)
255
256    def close_tab(self, tab_index: int = 1):
257        """Close specified tab and
258        switches to tab index 0."""
259        self.switch_to_tab(tab_index)
260        self.browser.close()
261        self.switch_to_tab(0)
262
263    def get(self, url: str):
264        """Requests webpage at given url and rotates userAgent if necessary."""
265        if not self.browser_open:
266            self.open_browser()
267        if (
268            self.randomize_user_agent
269            and self.user_agent_rotation_period is not None
270            and self.rotation_timer.elapsed > (60 * self.user_agent_rotation_period)
271        ):
272            self.rotation_timer.stop()
273            self.close_browser()
274            self.open_browser()
275        self.browser.get(url)
276        self.script("Object.defineProperty(navigator, 'webdriver', {get: () => false})")
277        self.chill(self.arrival_wait)
278
279    def get_soup(self) -> BeautifulSoup:
280        """Returns a BeautifulSoup object
281        of the current page source."""
282        return BeautifulSoup(self.browser.page_source, "html.parser")
283
284    def current_url(self) -> str:
285        """Returns current url of active tab."""
286        return self.browser.current_url
287
288    def delete_cookies(self):
289        """Delete all cookies for
290        this browser instance."""
291        self.browser.delete_all_cookies()
292
293    def turbo(self, engage: bool = True):
294        """When engaged, strings will be sent
295        to elements all at once and there will be
296        no waiting after actions.
297
298        When disengaged, strings will be sent to elements
299        'one key at a time' with randomized amounts of
300        time between successive keys and after actions."""
301        if engage:
302            self.after_key_wait = (0, 0)
303            self.after_field_wait = (0, 0)
304            self.after_click_wait = (0, 0)
305            self.arrival_wait = (1, 1)
306            self.one_key_at_a_time = False
307            self.turbo_engaged = True
308        else:
309            self.after_key_wait = (0.1, 0.5)
310            self.after_field_wait = (1, 2)
311            self.after_click_wait = (0.25, 1.5)
312            self.arrival_wait = (4, 10)
313            self.one_key_at_a_time = True
314            self.turbo_engaged = False
315
316    def chill(self, min_max: tuple[float, float]):
317        """Sleeps a random amount
318        between min_max[0] and min_max[1]."""
319        time.sleep(random.uniform(min_max[0], min_max[1]))
320
321    def script(self, script: str, args: Any = None) -> Any:
322        """Execute javascript code and returns result."""
323        return self.browser.execute_script(script, args)
324
325    def remove(self, locator: str):
326        """Removes element from DOM."""
327        self.script("arguments[0].remove();", self.find(locator))
328
329    def get_length(self, locator: str) -> int:
330        """Returns number of child elements for a given element."""
331        return int(self.script("return arguments[0].length;", self.find(locator)))
332
333    def find(self, locator: str) -> WebElement:
334        """Finds and returns a WebElement."""
335        match self.locator_method:
336            case "xpath":
337                return self.browser.find_element(By.XPATH, locator)
338            case "id":
339                return self.browser.find_element(By.ID, locator)
340            case "className":
341                return self.browser.find_element(By.CLASS_NAME, locator)
342            case "name":
343                return self.browser.find_element(By.NAME, locator)
344            case "cssSelector":
345                return self.browser.find_element(By.CSS_SELECTOR, locator)
346
347    def find_children(self, locator: str) -> list[WebElement]:
348        """Returns a list of child WebElements
349        for given locator arg."""
350        element = self.find(locator)
351        return element.find_elements("xpath", "./*")
352
353    def scroll(self, amount: int = None, fraction: float = None):
354        """Scroll web page.
355        :param amount: The number of lines to scroll if not None.
356
357        :param fraction: The amount between 0.0 and 1.0
358        of the page height to scroll.
359
360        If values are provided for both arguments,
361        amount will be used.
362
363        If values are provided for neither argument,
364        the entire page length will be scrolled.
365
366        Scrolls one line at a time if self.turbo is False."""
367        if amount:
368            amount_to_scroll = amount
369        elif fraction:
370            amount_to_scroll = int(
371                fraction
372                * (
373                    int(self.script("return document.body.scrollHeight;"))
374                    - int(self.script("return window.pageYOffset;"))
375                )
376            )
377        else:
378            amount_to_scroll = int(self.script("return document.body.scrollHeight;"))
379        if self.turbo_engaged:
380            self.script("window.scrollBy(0,arguments[0]);", amount_to_scroll)
381        else:
382            for _ in range(abs(amount_to_scroll)):
383                if amount_to_scroll >= 0:
384                    self.script("window.scrollBy(0,1);")
385                else:
386                    self.script("window.scrollBy(0,-1);")
387        self.chill(self.after_click_wait)
388
389    def scroll_into_view(self, locator: str) -> WebElement:
390        """Scrolls to a given element and returns the element."""
391        element = self.find(locator)
392        self.script("arguments[0].scrollIntoView();", element)
393        self.chill(self.after_click_wait)
394        return element
395
396    def text(self, locator: str) -> str:
397        """Returns text of WebElement."""
398        return self.find(locator).text
399
400    def click(self, locator: str) -> WebElement:
401        """Clicks on and returns WebElement."""
402        element = self.find(locator)
403        element.click()
404        self.chill(self.after_click_wait)
405        return element
406
407    def clear(self, locator: str) -> WebElement:
408        """Clears content of WebElement if able
409        and then returns WebElement."""
410        element = self.find(locator)
411        element.clear()
412        self.chill(self.after_click_wait)
413        return element
414
415    def switch_to_iframe(self, locator: str):
416        """Switch to an iframe from given locator."""
417        self.browser.switch_to.frame(self.find(locator))
418
419    def switch_to_parent_frame(self):
420        """Move up a frame level from current frame."""
421        self.browser.switch_to.parent_frame()
422
423    def select(
424        self, locator: str, method: str, choice: str | int | tuple
425    ) -> WebElement:
426        """Select a choice from Select element.
427        Returns the Select element from the locator string,
428        not the option element that is selected.
429
430        :param method: Can be 'value' or 'index'
431
432        :param choice: The option to select.
433
434        If method is 'value', then choice should be
435        the html 'value' attribute of the desired option.
436
437        If method is 'index', choice can either be a single
438        int for the desired option or it can be a two-tuple.
439        If the tuple is provided, a random option between the
440        two indicies (inclusive) will be selected."""
441        element = self.click(locator)
442        match method:
443            case "value":
444                Select(element).select_by_value(choice)
445            case "index":
446                if type(choice) == tuple:
447                    choice = random.randint(choice[0], choice[1])
448                Select(element).select_by_index(choice)
449        self.chill(self.after_field_wait)
450        return element
451
452    def click_elements(
453        self, locators: list[str], max_selections: int = None, min_selections: int = 1
454    ) -> WebElement:
455        """Click a random number of WebElements
456        and return the last WebElement clicked.
457
458        :param locators: A list of element locators to choose from.
459
460        :param max_selections: The maximum number of elements to click.
461        If None, the maximum will be the length of the locators list.
462
463        :param min_selections: The minimum number of elements to click.
464
465        e.g. self.click_elements([xpath1, xpath2, xpath3, xpath4], max_selections=3)
466        will click between 1 and 3 random elements from the list.
467        """
468        if not max_selections:
469            max_selections = len(locators)
470        for option in random.sample(
471            locators, k=random.randint(min_selections, max_selections)
472        ):
473            element = self.click(option)
474        return element
475
476    def get_click_list(
477        self, num_options: int, max_choices: int = 1, min_choices: int = 1
478    ) -> list[str]:
479        """Similar to self.click_elements(), but for use with the self.fill_next() method.
480
481        Creates a list of length 'num_options' where every element is 'skip'.
482
483        A random number of elements in the list between 'min_choices' and 'max_choices' are
484        replaced with 'keys.SPACE' (interpreted as a click by almost all web forms)."""
485        click_list = ["skip"] * num_options
486        selected_indexes = []
487        for i in range(random.randint(min_choices, max_choices)):
488            index = random.randint(0, num_options - 1)
489            while index in selected_indexes:
490                index = random.randint(0, num_options - 1)
491            selected_indexes.append(index)
492            click_list[index] = self.keys.SPACE
493        return click_list
494
495    def send_keys(
496        self,
497        locator: str,
498        data: str,
499        click_first: bool = True,
500        clear_first: bool = False,
501    ) -> WebElement:
502        """Types data into element and returns the element.
503
504        :param data: The string to send to the element.
505
506        :param click_first: If True, the element is clicked on
507        before the data is sent.
508
509        :param clear_first: If True, the current text of the element
510        is cleared before the data is sent."""
511        element = self.click(locator) if click_first else self.find(locator)
512        if clear_first:
513            element.clear()
514            self.chill(self.after_click_wait)
515        if self.one_key_at_a_time:
516            for ch in str(data):
517                element.send_keys(ch)
518                self.chill(self.after_key_wait)
519        else:
520            element.send_keys(str(data))
521        self.chill(self.after_field_wait)
522        return element
523
524    def fill_next(
525        self, data: list[str | tuple], start_element: WebElement = None
526    ) -> WebElement:
527        """Fills a form by tabbing from the current WebElement
528        to the next one and using the corresponding item in data.
529        Returns the last WebElement.
530
531        :param data: A list of form data. If an item is a string (except for 'skip')
532        it will be typed into the current WebElement.
533
534        An item in data can be a two-tuple of the form
535        ('downArrow', numberOfPresses:int|tuple[int, int]).
536
537        If numberOfPresses is a single int, Keys.ARROW_DOWN will be sent
538        that many times to the WebElement.
539
540        If numberOfPresses is a tuple, Keys.ARROW_DOWN will be sent a random
541        number of times between numberOfPresses[0] and numberOfPresses[1] inclusive.
542        This is typically for use with Select elements.
543
544        An item in data can also be 'skip', which will perform no action on the current
545        WebElement and will continue to the next one.
546
547        An item in data can also be 'click=n', where 'n' is an integer b/t 0 and 100,
548        representing a percent chance an element will be clicked or skipped:
549        >>> user.fill_next(["click=70"])
550
551        has a 70% chance of being
552        >>> user.fill_next([user.keys.SPACE])
553
554        and a 30% chance of being
555        >>> user.fill_next(["skip"])
556
557
558        :param start_element: The WebElement to start tabbing from.
559        The currently active element will be used if start_element is None.
560
561        Note: The function tabs to the next element before sending data,
562        so the start_element should the WebElement before the one
563        that should receive data[0].
564        """
565        element = (
566            self.browser.switch_to.active_element
567            if not start_element
568            else start_element
569        )
570        for datum in data:
571            element.send_keys(Keys.TAB)
572            element = self.browser.switch_to.active_element
573            self.chill(self.after_key_wait)
574            if type(datum) == str and datum.strip().startswith("click="):
575                chance = int(datum.split("=")[1].strip())
576                if random.randint(0, 100) <= chance:
577                    datum = Keys.SPACE
578                else:
579                    datum = "skip"
580            if datum[0] == "downArrow":
581                if type(datum[1]) == tuple:
582                    times = random.randint(datum[1][0], datum[1][1])
583                else:
584                    times = datum[1]
585                for _ in range(times):
586                    element.send_keys(Keys.ARROW_DOWN)
587                    self.chill(self.after_key_wait)
588            elif datum == "skip":
589                self.chill(self.after_key_wait)
590            else:
591
592                if self.turbo_engaged:
593                    element.send_keys(str(datum))
594                else:
595                    for ch in str(datum):
596                        element.send_keys(ch)
597                        self.chill(self.after_key_wait)
598            self.chill(self.after_field_wait)
599        return element
600
601    def wait_until(
602        self, condition: LambdaType, max_wait: float = 10, polling_interval: float = 0.1
603    ):
604        """Checks condition repeatedly until either it is true,
605        or the max_wait is exceeded.
606
607        Raises a TimeoutError if the condition doesn't success within max_wait.
608
609        Useful for determing whether a form has been successfully submitted.
610
611        :param condition: The condition function to check.
612
613        :param max_wait: Number of seconds to continue checking condition
614        before throwing a TimeoutError.
615
616        :param polling_interval: The number of seconds to sleep before
617        checking the condition function again after it fails.
618
619        e.g. self.wait_until(lambda: 'Successfully Submitted' in self.text('//p[@id="form-output"]))"""
620        start_time = time.time()
621        while True:
622            try:
623                if condition():
624                    time.sleep(1)
625                    break
626                elif (time.time() - start_time) > max_wait:
627                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
628                else:
629                    time.sleep(polling_interval)
630            except:
631                if (time.time() - start_time) > max_wait:
632                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
633                else:
634                    time.sleep(polling_interval)
635
636    def dismiss_alert(self):
637        """Dismiss alert dialog."""
638        self.browser.switch_to.alert.dismiss()
639
640    def solve_recaptcha_v3(
641        self,
642        outer_iframe_xpath: str = '//iframe[@title="reCAPTCHA"]',
643        inner_iframe_xpath: str = '//iframe[@title="recaptcha challenge expires in two minutes"]',
644    ):
645        """Pass google recaptcha v3 by solving an audio puzzle.
646
647        :param outer_iframe_xpath: Xpath to the iframe containing the recaptcha checkbox.
648        If it's the recaptcha without the initial checkbox that just shows the image puzzle,
649        pass None to this argument.
650
651        """
652        locator_method = self.locator_method
653        self.locator_method = "xpath"
654        try:
655            if outer_iframe_xpath:
656                self.switch_to_iframe(outer_iframe_xpath)
657                self.click('//*[@id="recaptcha-anchor"]')
658                self.switch_to_parent_frame()
659            self.switch_to_iframe(inner_iframe_xpath)
660            self.click('//*[@id="recaptcha-audio-button"]')
661            mp3_url = self.find(
662                '//a[@class="rc-audiochallenge-tdownload-link"]'
663            ).get_attribute("href")
664            text = get_text_from_url(mp3_url, ".mp3")
665            self.send_keys('//*[@id="audio-response"]', text)
666            self.click('//*[@id="recaptcha-verify-button"]')
667        except Exception as e:
668            print(e)
669            raise Exception("Could not solve captcha")
670        finally:
671            self.switch_to_parent_frame()
672            self.locator_method = locator_method
class User:
 28class User:
 29    """Sits on top of selenium to streamline
 30    automation and scraping tasks."""
 31
 32    def __init__(
 33        self,
 34        headless: bool = False,
 35        browser_type: str = "firefox",
 36        implicit_wait: int = 10,
 37        page_load_timeout: int = 60,
 38        open_browser: bool = True,
 39        locator_method: str = "xpath",
 40        randomize_user_agent: bool = True,
 41        user_agent_rotation_period: int = None,
 42        move_window_by: tuple[int, int] = (0, -1000),
 43        download_dir: str | Path = None,
 44        driver_path: str | Path = None,
 45    ):
 46        """
 47        :param headless: If True, browser window will not be visible.
 48
 49        :param browser_type: Which browser to use. Can be 'firefox' or 'chrome'.
 50
 51        :param implicit_wait: Number of seconds to look for a specified element before
 52        selenium considers it missing and throws an exception.
 53
 54        :param page_load_timeout: Time in seconds for selenium to wait for a page to load
 55        before throwing an exception.
 56
 57        :param open_browser: If True, opens a browser window when a User object is created.
 58        If False, a manual call to self.open_browser() must be made.
 59
 60        :param locator_method: The locator type User should expect to be given.
 61        Can be 'xpath', 'id', 'className', 'name', or 'cssSelector'.
 62        Every member function with a 'locator' argument refers to a string matching
 63        the current locator_method.
 64
 65        :param randomize_user_agent: If True, a random useragent will be used whenever
 66        the browser is opened. If False, the native useragent will be used.
 67
 68        :param user_agent_rotation_period: If not None, the browser window will be closed
 69        and reopened with a new useragent every user_agent_rotation_period number of minutes.
 70        Rotation occurs on the first call to self.get() after the time period has elapsed.
 71        Ignored if randomize_user_agent is False.
 72
 73        :param move_window_by: The x and y amount of pixels to move the browser window by after opening.
 74
 75        :param download_dir: The download folder to use. If None, the default folder will be used.
 76
 77        :param driver_path: The path to the webdriver executable selenium should use.
 78        If None, the system PATH will be checked for the executable.
 79        If the executable isn't found, the parent directories and the immediate child directories
 80        of the current working directory will be searched.
 81        """
 82        self.headless = headless
 83        browser_type = browser_type.lower()
 84        if browser_type in ["firefox", "chrome"]:
 85            self.browser_type = browser_type
 86        else:
 87            raise ValueError("'browser_type' parameter must be 'firefox' or 'chrome'")
 88        self.browser_open = False
 89        self.implicit_wait = implicit_wait
 90        self.page_load_timeout = page_load_timeout
 91        self.rotation_timer = Timer()
 92        self.randomize_user_agent = randomize_user_agent
 93        self.user_agent_rotation_period = user_agent_rotation_period
 94        self.locator_method = locator_method
 95        self.turbo()
 96        self.keys = Keys
 97        self.move_window_by = move_window_by
 98        self.download_dir = download_dir
 99        self.driver_path = driver_path
100        if not self.driver_path:
101            self.search_for_driver()
102        if open_browser:
103            self.open_browser()
104        else:
105            self.browser = None
106        atexit.register(self.close_browser)
107
108    def __enter__(self):
109        return self
110
111    def __exit__(self, *args):
112        self.close_browser()
113
114    def configure_firefox(self) -> FirefoxService:
115        """Configure options and profile for firefox."""
116        self.options = FirefoxOptions()
117        self.options.headless = self.headless
118        self.options.set_preference(
119            "widget.windows.window_occlusion_tracking.enabled", False
120        )
121        self.options.set_preference("dom.webaudio.enabled", False)
122        if self.randomize_user_agent:
123            self.options.set_preference("general.useragent.override", get_agent())
124        if self.download_dir:
125            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
126            self.profile = FirefoxProfile()
127            self.profile.set_preference("browser.download.dir", str(self.download_dir))
128            self.profile.set_preference("browser.download.folderList", 2)
129        else:
130            self.profile = None
131        self.service = FirefoxService(
132            executable_path=str(self.driver_path), log_path=os.devnull
133        )
134
135    def configure_chrome(self) -> ChromeService:
136        """Configure options and profile for chrome."""
137        self.options = ChromeOptions()
138        self.options.headless = self.headless
139        self.options.add_argument("--disable-blink-features=AutomationControlled")
140        self.options.add_argument("--mute-audio")
141        self.options.add_argument("--disable-infobars")
142        self.options.add_argument("--disable-notifications")
143        self.options.add_argument("--log-level=3")
144        if self.randomize_user_agent:
145            self.options.add_argument(f"--user-agent={get_agent()}")
146        self.options.add_experimental_option("useAutomationExtension", False)
147        if self.download_dir:
148            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
149            self.options.add_experimental_option(
150                "prefs", {"download.default_directory": str(self.download_dir)}
151            )
152        self.service = ChromeService(
153            executable_path=str(self.driver_path), log_path=os.devnull
154        )
155
156    def search_for_driver(self):
157        """Searches for the webdriver executable."""
158        cwd = Path.cwd()
159        found = False
160        match self.browser_type:
161            case "firefox":
162                driver = "geckodriver.exe"
163            case "chrome":
164                driver = "chromedriver.exe"
165        # search PATH
166        env_path = os.environ["PATH"]
167        if sys.platform == "win32":
168            env_paths = env_path.split(";")
169        else:
170            env_paths = env_path.split(":")
171            driver = driver[: driver.find(".")]
172        for path in env_paths:
173            if (Path(path) / driver).exists():
174                self.driver_path = Path(path) / driver
175                found = True
176                break
177        # check current working directory and parent folders
178        if not found:
179            while cwd != cwd.parent:
180                if (cwd / driver).exists():
181                    self.driver_path = cwd / driver
182                    found = True
183                    break
184                cwd = cwd.parent
185            # check top most level
186            if not found and (cwd / driver).exists():
187                self.driver_path = cwd / driver
188                found = True
189        # check child folders (only 1 level down)
190        if not found:
191            for child in Path.cwd().iterdir():
192                if child.is_dir() and (child / driver).exists():
193                    self.driver_path = child / driver
194                    found = True
195        if not found:
196            warn(f"Could not find {driver}")
197
198    def set_implicit_wait(self, wait_time: int = None):
199        """Sets to default time if no arg given."""
200        if not wait_time:
201            self.browser.implicitly_wait(self.implicit_wait)
202        else:
203            self.browser.implicitly_wait(wait_time)
204
205    def open_browser(self):
206        """Configures and opens selenium browser."""
207        if not self.browser_open:
208            match self.browser_type:
209                case "firefox":
210                    self.configure_firefox()
211                    self.browser = webdriver.Firefox(
212                        options=self.options,
213                        service=self.service,
214                        firefox_profile=self.profile,
215                    )
216                case "chrome":
217                    self.configure_chrome()
218                    self.browser = webdriver.Chrome(
219                        options=self.options, service=self.service
220                    )
221            self.set_implicit_wait()
222            self.browser.maximize_window()
223            self.browser.set_window_position(
224                self.move_window_by[0], self.move_window_by[1]
225            )
226            self.browser.maximize_window()
227            self.browser.set_page_load_timeout(self.page_load_timeout)
228            self.browser_open = True
229            self.tab_index = 0
230            self.rotation_timer.start()
231        else:
232            warn("Browser already open.")
233
234    def close_browser(self):
235        """Close browser window."""
236        if self.browser_open:
237            self.browser_open = False
238            self.browser.quit()
239
240    def open_tab(self, url: str = "", switch_to_tab: bool = True):
241        """Opens new tab and, if provided, goes to url.
242
243        New tab is inserted after currently active tab."""
244        self.script("window.open(arguments[0]);", url)
245        if switch_to_tab:
246            self.switch_to_tab(self.tab_index + 1)
247
248    def switch_to_tab(self, tab_index: int):
249        """Switch to a tab in browser, zero indexed."""
250        self.browser.switch_to.window(self.browser.window_handles[tab_index])
251        self.tab_index = tab_index
252
253    def get_num_tabs(self) -> int:
254        """Returns number of tabs open."""
255        return len(self.browser.window_handles)
256
257    def close_tab(self, tab_index: int = 1):
258        """Close specified tab and
259        switches to tab index 0."""
260        self.switch_to_tab(tab_index)
261        self.browser.close()
262        self.switch_to_tab(0)
263
264    def get(self, url: str):
265        """Requests webpage at given url and rotates userAgent if necessary."""
266        if not self.browser_open:
267            self.open_browser()
268        if (
269            self.randomize_user_agent
270            and self.user_agent_rotation_period is not None
271            and self.rotation_timer.elapsed > (60 * self.user_agent_rotation_period)
272        ):
273            self.rotation_timer.stop()
274            self.close_browser()
275            self.open_browser()
276        self.browser.get(url)
277        self.script("Object.defineProperty(navigator, 'webdriver', {get: () => false})")
278        self.chill(self.arrival_wait)
279
280    def get_soup(self) -> BeautifulSoup:
281        """Returns a BeautifulSoup object
282        of the current page source."""
283        return BeautifulSoup(self.browser.page_source, "html.parser")
284
285    def current_url(self) -> str:
286        """Returns current url of active tab."""
287        return self.browser.current_url
288
289    def delete_cookies(self):
290        """Delete all cookies for
291        this browser instance."""
292        self.browser.delete_all_cookies()
293
294    def turbo(self, engage: bool = True):
295        """When engaged, strings will be sent
296        to elements all at once and there will be
297        no waiting after actions.
298
299        When disengaged, strings will be sent to elements
300        'one key at a time' with randomized amounts of
301        time between successive keys and after actions."""
302        if engage:
303            self.after_key_wait = (0, 0)
304            self.after_field_wait = (0, 0)
305            self.after_click_wait = (0, 0)
306            self.arrival_wait = (1, 1)
307            self.one_key_at_a_time = False
308            self.turbo_engaged = True
309        else:
310            self.after_key_wait = (0.1, 0.5)
311            self.after_field_wait = (1, 2)
312            self.after_click_wait = (0.25, 1.5)
313            self.arrival_wait = (4, 10)
314            self.one_key_at_a_time = True
315            self.turbo_engaged = False
316
317    def chill(self, min_max: tuple[float, float]):
318        """Sleeps a random amount
319        between min_max[0] and min_max[1]."""
320        time.sleep(random.uniform(min_max[0], min_max[1]))
321
322    def script(self, script: str, args: Any = None) -> Any:
323        """Execute javascript code and returns result."""
324        return self.browser.execute_script(script, args)
325
326    def remove(self, locator: str):
327        """Removes element from DOM."""
328        self.script("arguments[0].remove();", self.find(locator))
329
330    def get_length(self, locator: str) -> int:
331        """Returns number of child elements for a given element."""
332        return int(self.script("return arguments[0].length;", self.find(locator)))
333
334    def find(self, locator: str) -> WebElement:
335        """Finds and returns a WebElement."""
336        match self.locator_method:
337            case "xpath":
338                return self.browser.find_element(By.XPATH, locator)
339            case "id":
340                return self.browser.find_element(By.ID, locator)
341            case "className":
342                return self.browser.find_element(By.CLASS_NAME, locator)
343            case "name":
344                return self.browser.find_element(By.NAME, locator)
345            case "cssSelector":
346                return self.browser.find_element(By.CSS_SELECTOR, locator)
347
348    def find_children(self, locator: str) -> list[WebElement]:
349        """Returns a list of child WebElements
350        for given locator arg."""
351        element = self.find(locator)
352        return element.find_elements("xpath", "./*")
353
354    def scroll(self, amount: int = None, fraction: float = None):
355        """Scroll web page.
356        :param amount: The number of lines to scroll if not None.
357
358        :param fraction: The amount between 0.0 and 1.0
359        of the page height to scroll.
360
361        If values are provided for both arguments,
362        amount will be used.
363
364        If values are provided for neither argument,
365        the entire page length will be scrolled.
366
367        Scrolls one line at a time if self.turbo is False."""
368        if amount:
369            amount_to_scroll = amount
370        elif fraction:
371            amount_to_scroll = int(
372                fraction
373                * (
374                    int(self.script("return document.body.scrollHeight;"))
375                    - int(self.script("return window.pageYOffset;"))
376                )
377            )
378        else:
379            amount_to_scroll = int(self.script("return document.body.scrollHeight;"))
380        if self.turbo_engaged:
381            self.script("window.scrollBy(0,arguments[0]);", amount_to_scroll)
382        else:
383            for _ in range(abs(amount_to_scroll)):
384                if amount_to_scroll >= 0:
385                    self.script("window.scrollBy(0,1);")
386                else:
387                    self.script("window.scrollBy(0,-1);")
388        self.chill(self.after_click_wait)
389
390    def scroll_into_view(self, locator: str) -> WebElement:
391        """Scrolls to a given element and returns the element."""
392        element = self.find(locator)
393        self.script("arguments[0].scrollIntoView();", element)
394        self.chill(self.after_click_wait)
395        return element
396
397    def text(self, locator: str) -> str:
398        """Returns text of WebElement."""
399        return self.find(locator).text
400
401    def click(self, locator: str) -> WebElement:
402        """Clicks on and returns WebElement."""
403        element = self.find(locator)
404        element.click()
405        self.chill(self.after_click_wait)
406        return element
407
408    def clear(self, locator: str) -> WebElement:
409        """Clears content of WebElement if able
410        and then returns WebElement."""
411        element = self.find(locator)
412        element.clear()
413        self.chill(self.after_click_wait)
414        return element
415
416    def switch_to_iframe(self, locator: str):
417        """Switch to an iframe from given locator."""
418        self.browser.switch_to.frame(self.find(locator))
419
420    def switch_to_parent_frame(self):
421        """Move up a frame level from current frame."""
422        self.browser.switch_to.parent_frame()
423
424    def select(
425        self, locator: str, method: str, choice: str | int | tuple
426    ) -> WebElement:
427        """Select a choice from Select element.
428        Returns the Select element from the locator string,
429        not the option element that is selected.
430
431        :param method: Can be 'value' or 'index'
432
433        :param choice: The option to select.
434
435        If method is 'value', then choice should be
436        the html 'value' attribute of the desired option.
437
438        If method is 'index', choice can either be a single
439        int for the desired option or it can be a two-tuple.
440        If the tuple is provided, a random option between the
441        two indicies (inclusive) will be selected."""
442        element = self.click(locator)
443        match method:
444            case "value":
445                Select(element).select_by_value(choice)
446            case "index":
447                if type(choice) == tuple:
448                    choice = random.randint(choice[0], choice[1])
449                Select(element).select_by_index(choice)
450        self.chill(self.after_field_wait)
451        return element
452
453    def click_elements(
454        self, locators: list[str], max_selections: int = None, min_selections: int = 1
455    ) -> WebElement:
456        """Click a random number of WebElements
457        and return the last WebElement clicked.
458
459        :param locators: A list of element locators to choose from.
460
461        :param max_selections: The maximum number of elements to click.
462        If None, the maximum will be the length of the locators list.
463
464        :param min_selections: The minimum number of elements to click.
465
466        e.g. self.click_elements([xpath1, xpath2, xpath3, xpath4], max_selections=3)
467        will click between 1 and 3 random elements from the list.
468        """
469        if not max_selections:
470            max_selections = len(locators)
471        for option in random.sample(
472            locators, k=random.randint(min_selections, max_selections)
473        ):
474            element = self.click(option)
475        return element
476
477    def get_click_list(
478        self, num_options: int, max_choices: int = 1, min_choices: int = 1
479    ) -> list[str]:
480        """Similar to self.click_elements(), but for use with the self.fill_next() method.
481
482        Creates a list of length 'num_options' where every element is 'skip'.
483
484        A random number of elements in the list between 'min_choices' and 'max_choices' are
485        replaced with 'keys.SPACE' (interpreted as a click by almost all web forms)."""
486        click_list = ["skip"] * num_options
487        selected_indexes = []
488        for i in range(random.randint(min_choices, max_choices)):
489            index = random.randint(0, num_options - 1)
490            while index in selected_indexes:
491                index = random.randint(0, num_options - 1)
492            selected_indexes.append(index)
493            click_list[index] = self.keys.SPACE
494        return click_list
495
496    def send_keys(
497        self,
498        locator: str,
499        data: str,
500        click_first: bool = True,
501        clear_first: bool = False,
502    ) -> WebElement:
503        """Types data into element and returns the element.
504
505        :param data: The string to send to the element.
506
507        :param click_first: If True, the element is clicked on
508        before the data is sent.
509
510        :param clear_first: If True, the current text of the element
511        is cleared before the data is sent."""
512        element = self.click(locator) if click_first else self.find(locator)
513        if clear_first:
514            element.clear()
515            self.chill(self.after_click_wait)
516        if self.one_key_at_a_time:
517            for ch in str(data):
518                element.send_keys(ch)
519                self.chill(self.after_key_wait)
520        else:
521            element.send_keys(str(data))
522        self.chill(self.after_field_wait)
523        return element
524
525    def fill_next(
526        self, data: list[str | tuple], start_element: WebElement = None
527    ) -> WebElement:
528        """Fills a form by tabbing from the current WebElement
529        to the next one and using the corresponding item in data.
530        Returns the last WebElement.
531
532        :param data: A list of form data. If an item is a string (except for 'skip')
533        it will be typed into the current WebElement.
534
535        An item in data can be a two-tuple of the form
536        ('downArrow', numberOfPresses:int|tuple[int, int]).
537
538        If numberOfPresses is a single int, Keys.ARROW_DOWN will be sent
539        that many times to the WebElement.
540
541        If numberOfPresses is a tuple, Keys.ARROW_DOWN will be sent a random
542        number of times between numberOfPresses[0] and numberOfPresses[1] inclusive.
543        This is typically for use with Select elements.
544
545        An item in data can also be 'skip', which will perform no action on the current
546        WebElement and will continue to the next one.
547
548        An item in data can also be 'click=n', where 'n' is an integer b/t 0 and 100,
549        representing a percent chance an element will be clicked or skipped:
550        >>> user.fill_next(["click=70"])
551
552        has a 70% chance of being
553        >>> user.fill_next([user.keys.SPACE])
554
555        and a 30% chance of being
556        >>> user.fill_next(["skip"])
557
558
559        :param start_element: The WebElement to start tabbing from.
560        The currently active element will be used if start_element is None.
561
562        Note: The function tabs to the next element before sending data,
563        so the start_element should the WebElement before the one
564        that should receive data[0].
565        """
566        element = (
567            self.browser.switch_to.active_element
568            if not start_element
569            else start_element
570        )
571        for datum in data:
572            element.send_keys(Keys.TAB)
573            element = self.browser.switch_to.active_element
574            self.chill(self.after_key_wait)
575            if type(datum) == str and datum.strip().startswith("click="):
576                chance = int(datum.split("=")[1].strip())
577                if random.randint(0, 100) <= chance:
578                    datum = Keys.SPACE
579                else:
580                    datum = "skip"
581            if datum[0] == "downArrow":
582                if type(datum[1]) == tuple:
583                    times = random.randint(datum[1][0], datum[1][1])
584                else:
585                    times = datum[1]
586                for _ in range(times):
587                    element.send_keys(Keys.ARROW_DOWN)
588                    self.chill(self.after_key_wait)
589            elif datum == "skip":
590                self.chill(self.after_key_wait)
591            else:
592
593                if self.turbo_engaged:
594                    element.send_keys(str(datum))
595                else:
596                    for ch in str(datum):
597                        element.send_keys(ch)
598                        self.chill(self.after_key_wait)
599            self.chill(self.after_field_wait)
600        return element
601
602    def wait_until(
603        self, condition: LambdaType, max_wait: float = 10, polling_interval: float = 0.1
604    ):
605        """Checks condition repeatedly until either it is true,
606        or the max_wait is exceeded.
607
608        Raises a TimeoutError if the condition doesn't success within max_wait.
609
610        Useful for determing whether a form has been successfully submitted.
611
612        :param condition: The condition function to check.
613
614        :param max_wait: Number of seconds to continue checking condition
615        before throwing a TimeoutError.
616
617        :param polling_interval: The number of seconds to sleep before
618        checking the condition function again after it fails.
619
620        e.g. self.wait_until(lambda: 'Successfully Submitted' in self.text('//p[@id="form-output"]))"""
621        start_time = time.time()
622        while True:
623            try:
624                if condition():
625                    time.sleep(1)
626                    break
627                elif (time.time() - start_time) > max_wait:
628                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
629                else:
630                    time.sleep(polling_interval)
631            except:
632                if (time.time() - start_time) > max_wait:
633                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
634                else:
635                    time.sleep(polling_interval)
636
637    def dismiss_alert(self):
638        """Dismiss alert dialog."""
639        self.browser.switch_to.alert.dismiss()
640
641    def solve_recaptcha_v3(
642        self,
643        outer_iframe_xpath: str = '//iframe[@title="reCAPTCHA"]',
644        inner_iframe_xpath: str = '//iframe[@title="recaptcha challenge expires in two minutes"]',
645    ):
646        """Pass google recaptcha v3 by solving an audio puzzle.
647
648        :param outer_iframe_xpath: Xpath to the iframe containing the recaptcha checkbox.
649        If it's the recaptcha without the initial checkbox that just shows the image puzzle,
650        pass None to this argument.
651
652        """
653        locator_method = self.locator_method
654        self.locator_method = "xpath"
655        try:
656            if outer_iframe_xpath:
657                self.switch_to_iframe(outer_iframe_xpath)
658                self.click('//*[@id="recaptcha-anchor"]')
659                self.switch_to_parent_frame()
660            self.switch_to_iframe(inner_iframe_xpath)
661            self.click('//*[@id="recaptcha-audio-button"]')
662            mp3_url = self.find(
663                '//a[@class="rc-audiochallenge-tdownload-link"]'
664            ).get_attribute("href")
665            text = get_text_from_url(mp3_url, ".mp3")
666            self.send_keys('//*[@id="audio-response"]', text)
667            self.click('//*[@id="recaptcha-verify-button"]')
668        except Exception as e:
669            print(e)
670            raise Exception("Could not solve captcha")
671        finally:
672            self.switch_to_parent_frame()
673            self.locator_method = locator_method

Sits on top of selenium to streamline automation and scraping tasks.

User( headless: bool = False, browser_type: str = 'firefox', implicit_wait: int = 10, page_load_timeout: int = 60, open_browser: bool = True, locator_method: str = 'xpath', randomize_user_agent: bool = True, user_agent_rotation_period: int = None, move_window_by: tuple[int, int] = (0, -1000), download_dir: str | pathlib.Path = None, driver_path: str | pathlib.Path = None)
 32    def __init__(
 33        self,
 34        headless: bool = False,
 35        browser_type: str = "firefox",
 36        implicit_wait: int = 10,
 37        page_load_timeout: int = 60,
 38        open_browser: bool = True,
 39        locator_method: str = "xpath",
 40        randomize_user_agent: bool = True,
 41        user_agent_rotation_period: int = None,
 42        move_window_by: tuple[int, int] = (0, -1000),
 43        download_dir: str | Path = None,
 44        driver_path: str | Path = None,
 45    ):
 46        """
 47        :param headless: If True, browser window will not be visible.
 48
 49        :param browser_type: Which browser to use. Can be 'firefox' or 'chrome'.
 50
 51        :param implicit_wait: Number of seconds to look for a specified element before
 52        selenium considers it missing and throws an exception.
 53
 54        :param page_load_timeout: Time in seconds for selenium to wait for a page to load
 55        before throwing an exception.
 56
 57        :param open_browser: If True, opens a browser window when a User object is created.
 58        If False, a manual call to self.open_browser() must be made.
 59
 60        :param locator_method: The locator type User should expect to be given.
 61        Can be 'xpath', 'id', 'className', 'name', or 'cssSelector'.
 62        Every member function with a 'locator' argument refers to a string matching
 63        the current locator_method.
 64
 65        :param randomize_user_agent: If True, a random useragent will be used whenever
 66        the browser is opened. If False, the native useragent will be used.
 67
 68        :param user_agent_rotation_period: If not None, the browser window will be closed
 69        and reopened with a new useragent every user_agent_rotation_period number of minutes.
 70        Rotation occurs on the first call to self.get() after the time period has elapsed.
 71        Ignored if randomize_user_agent is False.
 72
 73        :param move_window_by: The x and y amount of pixels to move the browser window by after opening.
 74
 75        :param download_dir: The download folder to use. If None, the default folder will be used.
 76
 77        :param driver_path: The path to the webdriver executable selenium should use.
 78        If None, the system PATH will be checked for the executable.
 79        If the executable isn't found, the parent directories and the immediate child directories
 80        of the current working directory will be searched.
 81        """
 82        self.headless = headless
 83        browser_type = browser_type.lower()
 84        if browser_type in ["firefox", "chrome"]:
 85            self.browser_type = browser_type
 86        else:
 87            raise ValueError("'browser_type' parameter must be 'firefox' or 'chrome'")
 88        self.browser_open = False
 89        self.implicit_wait = implicit_wait
 90        self.page_load_timeout = page_load_timeout
 91        self.rotation_timer = Timer()
 92        self.randomize_user_agent = randomize_user_agent
 93        self.user_agent_rotation_period = user_agent_rotation_period
 94        self.locator_method = locator_method
 95        self.turbo()
 96        self.keys = Keys
 97        self.move_window_by = move_window_by
 98        self.download_dir = download_dir
 99        self.driver_path = driver_path
100        if not self.driver_path:
101            self.search_for_driver()
102        if open_browser:
103            self.open_browser()
104        else:
105            self.browser = None
106        atexit.register(self.close_browser)
Parameters
  • headless: If True, browser window will not be visible.

  • browser_type: Which browser to use. Can be 'firefox' or 'chrome'.

  • implicit_wait: Number of seconds to look for a specified element before selenium considers it missing and throws an exception.

  • page_load_timeout: Time in seconds for selenium to wait for a page to load before throwing an exception.

  • open_browser: If True, opens a browser window when a User object is created. If False, a manual call to self.open_browser() must be made.

  • locator_method: The locator type User should expect to be given. Can be 'xpath', 'id', 'className', 'name', or 'cssSelector'. Every member function with a 'locator' argument refers to a string matching the current locator_method.

  • randomize_user_agent: If True, a random useragent will be used whenever the browser is opened. If False, the native useragent will be used.

  • user_agent_rotation_period: If not None, the browser window will be closed and reopened with a new useragent every user_agent_rotation_period number of minutes. Rotation occurs on the first call to self.get() after the time period has elapsed. Ignored if randomize_user_agent is False.

  • move_window_by: The x and y amount of pixels to move the browser window by after opening.

  • download_dir: The download folder to use. If None, the default folder will be used.

  • driver_path: The path to the webdriver executable selenium should use. If None, the system PATH will be checked for the executable. If the executable isn't found, the parent directories and the immediate child directories of the current working directory will be searched.

def configure_firefox(self) -> selenium.webdriver.firefox.service.Service:
114    def configure_firefox(self) -> FirefoxService:
115        """Configure options and profile for firefox."""
116        self.options = FirefoxOptions()
117        self.options.headless = self.headless
118        self.options.set_preference(
119            "widget.windows.window_occlusion_tracking.enabled", False
120        )
121        self.options.set_preference("dom.webaudio.enabled", False)
122        if self.randomize_user_agent:
123            self.options.set_preference("general.useragent.override", get_agent())
124        if self.download_dir:
125            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
126            self.profile = FirefoxProfile()
127            self.profile.set_preference("browser.download.dir", str(self.download_dir))
128            self.profile.set_preference("browser.download.folderList", 2)
129        else:
130            self.profile = None
131        self.service = FirefoxService(
132            executable_path=str(self.driver_path), log_path=os.devnull
133        )

Configure options and profile for firefox.

def configure_chrome(self) -> selenium.webdriver.chrome.service.Service:
135    def configure_chrome(self) -> ChromeService:
136        """Configure options and profile for chrome."""
137        self.options = ChromeOptions()
138        self.options.headless = self.headless
139        self.options.add_argument("--disable-blink-features=AutomationControlled")
140        self.options.add_argument("--mute-audio")
141        self.options.add_argument("--disable-infobars")
142        self.options.add_argument("--disable-notifications")
143        self.options.add_argument("--log-level=3")
144        if self.randomize_user_agent:
145            self.options.add_argument(f"--user-agent={get_agent()}")
146        self.options.add_experimental_option("useAutomationExtension", False)
147        if self.download_dir:
148            Path(self.download_dir).mkdir(parents=True, exist_ok=True)
149            self.options.add_experimental_option(
150                "prefs", {"download.default_directory": str(self.download_dir)}
151            )
152        self.service = ChromeService(
153            executable_path=str(self.driver_path), log_path=os.devnull
154        )

Configure options and profile for chrome.

def search_for_driver(self):
156    def search_for_driver(self):
157        """Searches for the webdriver executable."""
158        cwd = Path.cwd()
159        found = False
160        match self.browser_type:
161            case "firefox":
162                driver = "geckodriver.exe"
163            case "chrome":
164                driver = "chromedriver.exe"
165        # search PATH
166        env_path = os.environ["PATH"]
167        if sys.platform == "win32":
168            env_paths = env_path.split(";")
169        else:
170            env_paths = env_path.split(":")
171            driver = driver[: driver.find(".")]
172        for path in env_paths:
173            if (Path(path) / driver).exists():
174                self.driver_path = Path(path) / driver
175                found = True
176                break
177        # check current working directory and parent folders
178        if not found:
179            while cwd != cwd.parent:
180                if (cwd / driver).exists():
181                    self.driver_path = cwd / driver
182                    found = True
183                    break
184                cwd = cwd.parent
185            # check top most level
186            if not found and (cwd / driver).exists():
187                self.driver_path = cwd / driver
188                found = True
189        # check child folders (only 1 level down)
190        if not found:
191            for child in Path.cwd().iterdir():
192                if child.is_dir() and (child / driver).exists():
193                    self.driver_path = child / driver
194                    found = True
195        if not found:
196            warn(f"Could not find {driver}")

Searches for the webdriver executable.

def set_implicit_wait(self, wait_time: int = None):
198    def set_implicit_wait(self, wait_time: int = None):
199        """Sets to default time if no arg given."""
200        if not wait_time:
201            self.browser.implicitly_wait(self.implicit_wait)
202        else:
203            self.browser.implicitly_wait(wait_time)

Sets to default time if no arg given.

def open_browser(self):
205    def open_browser(self):
206        """Configures and opens selenium browser."""
207        if not self.browser_open:
208            match self.browser_type:
209                case "firefox":
210                    self.configure_firefox()
211                    self.browser = webdriver.Firefox(
212                        options=self.options,
213                        service=self.service,
214                        firefox_profile=self.profile,
215                    )
216                case "chrome":
217                    self.configure_chrome()
218                    self.browser = webdriver.Chrome(
219                        options=self.options, service=self.service
220                    )
221            self.set_implicit_wait()
222            self.browser.maximize_window()
223            self.browser.set_window_position(
224                self.move_window_by[0], self.move_window_by[1]
225            )
226            self.browser.maximize_window()
227            self.browser.set_page_load_timeout(self.page_load_timeout)
228            self.browser_open = True
229            self.tab_index = 0
230            self.rotation_timer.start()
231        else:
232            warn("Browser already open.")

Configures and opens selenium browser.

def close_browser(self):
234    def close_browser(self):
235        """Close browser window."""
236        if self.browser_open:
237            self.browser_open = False
238            self.browser.quit()

Close browser window.

def open_tab(self, url: str = '', switch_to_tab: bool = True):
240    def open_tab(self, url: str = "", switch_to_tab: bool = True):
241        """Opens new tab and, if provided, goes to url.
242
243        New tab is inserted after currently active tab."""
244        self.script("window.open(arguments[0]);", url)
245        if switch_to_tab:
246            self.switch_to_tab(self.tab_index + 1)

Opens new tab and, if provided, goes to url.

New tab is inserted after currently active tab.

def switch_to_tab(self, tab_index: int):
248    def switch_to_tab(self, tab_index: int):
249        """Switch to a tab in browser, zero indexed."""
250        self.browser.switch_to.window(self.browser.window_handles[tab_index])
251        self.tab_index = tab_index

Switch to a tab in browser, zero indexed.

def get_num_tabs(self) -> int:
253    def get_num_tabs(self) -> int:
254        """Returns number of tabs open."""
255        return len(self.browser.window_handles)

Returns number of tabs open.

def close_tab(self, tab_index: int = 1):
257    def close_tab(self, tab_index: int = 1):
258        """Close specified tab and
259        switches to tab index 0."""
260        self.switch_to_tab(tab_index)
261        self.browser.close()
262        self.switch_to_tab(0)

Close specified tab and switches to tab index 0.

def get(self, url: str):
264    def get(self, url: str):
265        """Requests webpage at given url and rotates userAgent if necessary."""
266        if not self.browser_open:
267            self.open_browser()
268        if (
269            self.randomize_user_agent
270            and self.user_agent_rotation_period is not None
271            and self.rotation_timer.elapsed > (60 * self.user_agent_rotation_period)
272        ):
273            self.rotation_timer.stop()
274            self.close_browser()
275            self.open_browser()
276        self.browser.get(url)
277        self.script("Object.defineProperty(navigator, 'webdriver', {get: () => false})")
278        self.chill(self.arrival_wait)

Requests webpage at given url and rotates userAgent if necessary.

def get_soup(self) -> bs4.BeautifulSoup:
280    def get_soup(self) -> BeautifulSoup:
281        """Returns a BeautifulSoup object
282        of the current page source."""
283        return BeautifulSoup(self.browser.page_source, "html.parser")

Returns a BeautifulSoup object of the current page source.

def current_url(self) -> str:
285    def current_url(self) -> str:
286        """Returns current url of active tab."""
287        return self.browser.current_url

Returns current url of active tab.

def delete_cookies(self):
289    def delete_cookies(self):
290        """Delete all cookies for
291        this browser instance."""
292        self.browser.delete_all_cookies()

Delete all cookies for this browser instance.

def turbo(self, engage: bool = True):
294    def turbo(self, engage: bool = True):
295        """When engaged, strings will be sent
296        to elements all at once and there will be
297        no waiting after actions.
298
299        When disengaged, strings will be sent to elements
300        'one key at a time' with randomized amounts of
301        time between successive keys and after actions."""
302        if engage:
303            self.after_key_wait = (0, 0)
304            self.after_field_wait = (0, 0)
305            self.after_click_wait = (0, 0)
306            self.arrival_wait = (1, 1)
307            self.one_key_at_a_time = False
308            self.turbo_engaged = True
309        else:
310            self.after_key_wait = (0.1, 0.5)
311            self.after_field_wait = (1, 2)
312            self.after_click_wait = (0.25, 1.5)
313            self.arrival_wait = (4, 10)
314            self.one_key_at_a_time = True
315            self.turbo_engaged = False

When engaged, strings will be sent to elements all at once and there will be no waiting after actions.

When disengaged, strings will be sent to elements 'one key at a time' with randomized amounts of time between successive keys and after actions.

def chill(self, min_max: tuple[float, float]):
317    def chill(self, min_max: tuple[float, float]):
318        """Sleeps a random amount
319        between min_max[0] and min_max[1]."""
320        time.sleep(random.uniform(min_max[0], min_max[1]))

Sleeps a random amount between min_max[0] and min_max[1].

def script(self, script: str, args: Any = None) -> Any:
322    def script(self, script: str, args: Any = None) -> Any:
323        """Execute javascript code and returns result."""
324        return self.browser.execute_script(script, args)

Execute javascript code and returns result.

def remove(self, locator: str):
326    def remove(self, locator: str):
327        """Removes element from DOM."""
328        self.script("arguments[0].remove();", self.find(locator))

Removes element from DOM.

def get_length(self, locator: str) -> int:
330    def get_length(self, locator: str) -> int:
331        """Returns number of child elements for a given element."""
332        return int(self.script("return arguments[0].length;", self.find(locator)))

Returns number of child elements for a given element.

def find(self, locator: str) -> selenium.webdriver.remote.webelement.WebElement:
334    def find(self, locator: str) -> WebElement:
335        """Finds and returns a WebElement."""
336        match self.locator_method:
337            case "xpath":
338                return self.browser.find_element(By.XPATH, locator)
339            case "id":
340                return self.browser.find_element(By.ID, locator)
341            case "className":
342                return self.browser.find_element(By.CLASS_NAME, locator)
343            case "name":
344                return self.browser.find_element(By.NAME, locator)
345            case "cssSelector":
346                return self.browser.find_element(By.CSS_SELECTOR, locator)

Finds and returns a WebElement.

def find_children( self, locator: str) -> list[selenium.webdriver.remote.webelement.WebElement]:
348    def find_children(self, locator: str) -> list[WebElement]:
349        """Returns a list of child WebElements
350        for given locator arg."""
351        element = self.find(locator)
352        return element.find_elements("xpath", "./*")

Returns a list of child WebElements for given locator arg.

def scroll(self, amount: int = None, fraction: float = None):
354    def scroll(self, amount: int = None, fraction: float = None):
355        """Scroll web page.
356        :param amount: The number of lines to scroll if not None.
357
358        :param fraction: The amount between 0.0 and 1.0
359        of the page height to scroll.
360
361        If values are provided for both arguments,
362        amount will be used.
363
364        If values are provided for neither argument,
365        the entire page length will be scrolled.
366
367        Scrolls one line at a time if self.turbo is False."""
368        if amount:
369            amount_to_scroll = amount
370        elif fraction:
371            amount_to_scroll = int(
372                fraction
373                * (
374                    int(self.script("return document.body.scrollHeight;"))
375                    - int(self.script("return window.pageYOffset;"))
376                )
377            )
378        else:
379            amount_to_scroll = int(self.script("return document.body.scrollHeight;"))
380        if self.turbo_engaged:
381            self.script("window.scrollBy(0,arguments[0]);", amount_to_scroll)
382        else:
383            for _ in range(abs(amount_to_scroll)):
384                if amount_to_scroll >= 0:
385                    self.script("window.scrollBy(0,1);")
386                else:
387                    self.script("window.scrollBy(0,-1);")
388        self.chill(self.after_click_wait)

Scroll web page.

Parameters
  • amount: The number of lines to scroll if not None.

  • fraction: The amount between 0.0 and 1.0 of the page height to scroll.

If values are provided for both arguments, amount will be used.

If values are provided for neither argument, the entire page length will be scrolled.

Scrolls one line at a time if self.turbo is False.

def scroll_into_view(self, locator: str) -> selenium.webdriver.remote.webelement.WebElement:
390    def scroll_into_view(self, locator: str) -> WebElement:
391        """Scrolls to a given element and returns the element."""
392        element = self.find(locator)
393        self.script("arguments[0].scrollIntoView();", element)
394        self.chill(self.after_click_wait)
395        return element

Scrolls to a given element and returns the element.

def text(self, locator: str) -> str:
397    def text(self, locator: str) -> str:
398        """Returns text of WebElement."""
399        return self.find(locator).text

Returns text of WebElement.

def click(self, locator: str) -> selenium.webdriver.remote.webelement.WebElement:
401    def click(self, locator: str) -> WebElement:
402        """Clicks on and returns WebElement."""
403        element = self.find(locator)
404        element.click()
405        self.chill(self.after_click_wait)
406        return element

Clicks on and returns WebElement.

def clear(self, locator: str) -> selenium.webdriver.remote.webelement.WebElement:
408    def clear(self, locator: str) -> WebElement:
409        """Clears content of WebElement if able
410        and then returns WebElement."""
411        element = self.find(locator)
412        element.clear()
413        self.chill(self.after_click_wait)
414        return element

Clears content of WebElement if able and then returns WebElement.

def switch_to_iframe(self, locator: str):
416    def switch_to_iframe(self, locator: str):
417        """Switch to an iframe from given locator."""
418        self.browser.switch_to.frame(self.find(locator))

Switch to an iframe from given locator.

def switch_to_parent_frame(self):
420    def switch_to_parent_frame(self):
421        """Move up a frame level from current frame."""
422        self.browser.switch_to.parent_frame()

Move up a frame level from current frame.

def select( self, locator: str, method: str, choice: str | int | tuple) -> selenium.webdriver.remote.webelement.WebElement:
424    def select(
425        self, locator: str, method: str, choice: str | int | tuple
426    ) -> WebElement:
427        """Select a choice from Select element.
428        Returns the Select element from the locator string,
429        not the option element that is selected.
430
431        :param method: Can be 'value' or 'index'
432
433        :param choice: The option to select.
434
435        If method is 'value', then choice should be
436        the html 'value' attribute of the desired option.
437
438        If method is 'index', choice can either be a single
439        int for the desired option or it can be a two-tuple.
440        If the tuple is provided, a random option between the
441        two indicies (inclusive) will be selected."""
442        element = self.click(locator)
443        match method:
444            case "value":
445                Select(element).select_by_value(choice)
446            case "index":
447                if type(choice) == tuple:
448                    choice = random.randint(choice[0], choice[1])
449                Select(element).select_by_index(choice)
450        self.chill(self.after_field_wait)
451        return element

Select a choice from Select element. Returns the Select element from the locator string, not the option element that is selected.

Parameters
  • method: Can be 'value' or 'index'

  • choice: The option to select.

If method is 'value', then choice should be the html 'value' attribute of the desired option.

If method is 'index', choice can either be a single int for the desired option or it can be a two-tuple. If the tuple is provided, a random option between the two indicies (inclusive) will be selected.

def click_elements( self, locators: list[str], max_selections: int = None, min_selections: int = 1) -> selenium.webdriver.remote.webelement.WebElement:
453    def click_elements(
454        self, locators: list[str], max_selections: int = None, min_selections: int = 1
455    ) -> WebElement:
456        """Click a random number of WebElements
457        and return the last WebElement clicked.
458
459        :param locators: A list of element locators to choose from.
460
461        :param max_selections: The maximum number of elements to click.
462        If None, the maximum will be the length of the locators list.
463
464        :param min_selections: The minimum number of elements to click.
465
466        e.g. self.click_elements([xpath1, xpath2, xpath3, xpath4], max_selections=3)
467        will click between 1 and 3 random elements from the list.
468        """
469        if not max_selections:
470            max_selections = len(locators)
471        for option in random.sample(
472            locators, k=random.randint(min_selections, max_selections)
473        ):
474            element = self.click(option)
475        return element

Click a random number of WebElements and return the last WebElement clicked.

Parameters
  • locators: A list of element locators to choose from.

  • max_selections: The maximum number of elements to click. If None, the maximum will be the length of the locators list.

  • min_selections: The minimum number of elements to click.

e.g. self.click_elements([xpath1, xpath2, xpath3, xpath4], max_selections=3) will click between 1 and 3 random elements from the list.

def get_click_list( self, num_options: int, max_choices: int = 1, min_choices: int = 1) -> list[str]:
477    def get_click_list(
478        self, num_options: int, max_choices: int = 1, min_choices: int = 1
479    ) -> list[str]:
480        """Similar to self.click_elements(), but for use with the self.fill_next() method.
481
482        Creates a list of length 'num_options' where every element is 'skip'.
483
484        A random number of elements in the list between 'min_choices' and 'max_choices' are
485        replaced with 'keys.SPACE' (interpreted as a click by almost all web forms)."""
486        click_list = ["skip"] * num_options
487        selected_indexes = []
488        for i in range(random.randint(min_choices, max_choices)):
489            index = random.randint(0, num_options - 1)
490            while index in selected_indexes:
491                index = random.randint(0, num_options - 1)
492            selected_indexes.append(index)
493            click_list[index] = self.keys.SPACE
494        return click_list

Similar to self.click_elements(), but for use with the self.fill_next() method.

Creates a list of length 'num_options' where every element is 'skip'.

A random number of elements in the list between 'min_choices' and 'max_choices' are replaced with 'keys.SPACE' (interpreted as a click by almost all web forms).

def send_keys( self, locator: str, data: str, click_first: bool = True, clear_first: bool = False) -> selenium.webdriver.remote.webelement.WebElement:
496    def send_keys(
497        self,
498        locator: str,
499        data: str,
500        click_first: bool = True,
501        clear_first: bool = False,
502    ) -> WebElement:
503        """Types data into element and returns the element.
504
505        :param data: The string to send to the element.
506
507        :param click_first: If True, the element is clicked on
508        before the data is sent.
509
510        :param clear_first: If True, the current text of the element
511        is cleared before the data is sent."""
512        element = self.click(locator) if click_first else self.find(locator)
513        if clear_first:
514            element.clear()
515            self.chill(self.after_click_wait)
516        if self.one_key_at_a_time:
517            for ch in str(data):
518                element.send_keys(ch)
519                self.chill(self.after_key_wait)
520        else:
521            element.send_keys(str(data))
522        self.chill(self.after_field_wait)
523        return element

Types data into element and returns the element.

Parameters
  • data: The string to send to the element.

  • click_first: If True, the element is clicked on before the data is sent.

  • clear_first: If True, the current text of the element is cleared before the data is sent.

def fill_next( self, data: list[str | tuple], start_element: selenium.webdriver.remote.webelement.WebElement = None) -> selenium.webdriver.remote.webelement.WebElement:
525    def fill_next(
526        self, data: list[str | tuple], start_element: WebElement = None
527    ) -> WebElement:
528        """Fills a form by tabbing from the current WebElement
529        to the next one and using the corresponding item in data.
530        Returns the last WebElement.
531
532        :param data: A list of form data. If an item is a string (except for 'skip')
533        it will be typed into the current WebElement.
534
535        An item in data can be a two-tuple of the form
536        ('downArrow', numberOfPresses:int|tuple[int, int]).
537
538        If numberOfPresses is a single int, Keys.ARROW_DOWN will be sent
539        that many times to the WebElement.
540
541        If numberOfPresses is a tuple, Keys.ARROW_DOWN will be sent a random
542        number of times between numberOfPresses[0] and numberOfPresses[1] inclusive.
543        This is typically for use with Select elements.
544
545        An item in data can also be 'skip', which will perform no action on the current
546        WebElement and will continue to the next one.
547
548        An item in data can also be 'click=n', where 'n' is an integer b/t 0 and 100,
549        representing a percent chance an element will be clicked or skipped:
550        >>> user.fill_next(["click=70"])
551
552        has a 70% chance of being
553        >>> user.fill_next([user.keys.SPACE])
554
555        and a 30% chance of being
556        >>> user.fill_next(["skip"])
557
558
559        :param start_element: The WebElement to start tabbing from.
560        The currently active element will be used if start_element is None.
561
562        Note: The function tabs to the next element before sending data,
563        so the start_element should the WebElement before the one
564        that should receive data[0].
565        """
566        element = (
567            self.browser.switch_to.active_element
568            if not start_element
569            else start_element
570        )
571        for datum in data:
572            element.send_keys(Keys.TAB)
573            element = self.browser.switch_to.active_element
574            self.chill(self.after_key_wait)
575            if type(datum) == str and datum.strip().startswith("click="):
576                chance = int(datum.split("=")[1].strip())
577                if random.randint(0, 100) <= chance:
578                    datum = Keys.SPACE
579                else:
580                    datum = "skip"
581            if datum[0] == "downArrow":
582                if type(datum[1]) == tuple:
583                    times = random.randint(datum[1][0], datum[1][1])
584                else:
585                    times = datum[1]
586                for _ in range(times):
587                    element.send_keys(Keys.ARROW_DOWN)
588                    self.chill(self.after_key_wait)
589            elif datum == "skip":
590                self.chill(self.after_key_wait)
591            else:
592
593                if self.turbo_engaged:
594                    element.send_keys(str(datum))
595                else:
596                    for ch in str(datum):
597                        element.send_keys(ch)
598                        self.chill(self.after_key_wait)
599            self.chill(self.after_field_wait)
600        return element

Fills a form by tabbing from the current WebElement to the next one and using the corresponding item in data. Returns the last WebElement.

Parameters
  • data: A list of form data. If an item is a string (except for 'skip') it will be typed into the current WebElement.

An item in data can be a two-tuple of the form ('downArrow', numberOfPresses:int|tuple[int, int]).

If numberOfPresses is a single int, Keys.ARROW_DOWN will be sent that many times to the WebElement.

If numberOfPresses is a tuple, Keys.ARROW_DOWN will be sent a random number of times between numberOfPresses[0] and numberOfPresses[1] inclusive. This is typically for use with Select elements.

An item in data can also be 'skip', which will perform no action on the current WebElement and will continue to the next one.

An item in data can also be 'click=n', where 'n' is an integer b/t 0 and 100, representing a percent chance an element will be clicked or skipped:

>>> user.fill_next(["click=70"])

has a 70% chance of being

>>> user.fill_next([user.keys.SPACE])

and a 30% chance of being

>>> user.fill_next(["skip"])
  • start_element: The WebElement to start tabbing from. The currently active element will be used if start_element is None.

Note: The function tabs to the next element before sending data, so the start_element should the WebElement before the one that should receive data[0].

def wait_until( self, condition: function, max_wait: float = 10, polling_interval: float = 0.1):
602    def wait_until(
603        self, condition: LambdaType, max_wait: float = 10, polling_interval: float = 0.1
604    ):
605        """Checks condition repeatedly until either it is true,
606        or the max_wait is exceeded.
607
608        Raises a TimeoutError if the condition doesn't success within max_wait.
609
610        Useful for determing whether a form has been successfully submitted.
611
612        :param condition: The condition function to check.
613
614        :param max_wait: Number of seconds to continue checking condition
615        before throwing a TimeoutError.
616
617        :param polling_interval: The number of seconds to sleep before
618        checking the condition function again after it fails.
619
620        e.g. self.wait_until(lambda: 'Successfully Submitted' in self.text('//p[@id="form-output"]))"""
621        start_time = time.time()
622        while True:
623            try:
624                if condition():
625                    time.sleep(1)
626                    break
627                elif (time.time() - start_time) > max_wait:
628                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
629                else:
630                    time.sleep(polling_interval)
631            except:
632                if (time.time() - start_time) > max_wait:
633                    raise TimeoutError(f"max_wait exceeded in wait_until({condition})")
634                else:
635                    time.sleep(polling_interval)

Checks condition repeatedly until either it is true, or the max_wait is exceeded.

Raises a TimeoutError if the condition doesn't success within max_wait.

Useful for determing whether a form has been successfully submitted.

Parameters
  • condition: The condition function to check.

  • max_wait: Number of seconds to continue checking condition before throwing a TimeoutError.

  • polling_interval: The number of seconds to sleep before checking the condition function again after it fails.

e.g. self.wait_until(lambda: 'Successfully Submitted' in self.text('//p[@id="form-output"]))

def dismiss_alert(self):
637    def dismiss_alert(self):
638        """Dismiss alert dialog."""
639        self.browser.switch_to.alert.dismiss()

Dismiss alert dialog.

def solve_recaptcha_v3( self, outer_iframe_xpath: str = '//iframe[@title="reCAPTCHA"]', inner_iframe_xpath: str = '//iframe[@title="recaptcha challenge expires in two minutes"]'):
641    def solve_recaptcha_v3(
642        self,
643        outer_iframe_xpath: str = '//iframe[@title="reCAPTCHA"]',
644        inner_iframe_xpath: str = '//iframe[@title="recaptcha challenge expires in two minutes"]',
645    ):
646        """Pass google recaptcha v3 by solving an audio puzzle.
647
648        :param outer_iframe_xpath: Xpath to the iframe containing the recaptcha checkbox.
649        If it's the recaptcha without the initial checkbox that just shows the image puzzle,
650        pass None to this argument.
651
652        """
653        locator_method = self.locator_method
654        self.locator_method = "xpath"
655        try:
656            if outer_iframe_xpath:
657                self.switch_to_iframe(outer_iframe_xpath)
658                self.click('//*[@id="recaptcha-anchor"]')
659                self.switch_to_parent_frame()
660            self.switch_to_iframe(inner_iframe_xpath)
661            self.click('//*[@id="recaptcha-audio-button"]')
662            mp3_url = self.find(
663                '//a[@class="rc-audiochallenge-tdownload-link"]'
664            ).get_attribute("href")
665            text = get_text_from_url(mp3_url, ".mp3")
666            self.send_keys('//*[@id="audio-response"]', text)
667            self.click('//*[@id="recaptcha-verify-button"]')
668        except Exception as e:
669            print(e)
670            raise Exception("Could not solve captcha")
671        finally:
672            self.switch_to_parent_frame()
673            self.locator_method = locator_method

Pass google recaptcha v3 by solving an audio puzzle.

Parameters
  • outer_iframe_xpath: Xpath to the iframe containing the recaptcha checkbox. If it's the recaptcha without the initial checkbox that just shows the image puzzle, pass None to this argument.