#!/usr/bin/env python3
# Timestamp: "2025-08-07 20:04:42 (ywatanabe)"
# File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/browser/local/_BrowserMixin.py
# ----------------------------------------
from __future__ import annotations
import os
__FILE__ = __file__
__DIR__ = os.path.dirname(__FILE__)
# ----------------------------------------
# Optional dependencies - browser module requires these
try:
import aiohttp
except ImportError:
aiohttp = None
try:
from playwright.async_api import Browser, async_playwright
except ImportError:
Browser = None
async_playwright = None
try:
from scitex_browser.automation import CookieAutoAcceptor
except ImportError:
CookieAutoAcceptor = None
[docs]
class BrowserMixin:
"""Mixin for local browser-based strategies with common functionality.
Browser Modes:
- interactive: For human interaction (authentication, debugging) - 1280x720 viewport
- stealth: For automated operations (scraping, downloading) - 1x1 viewport
Note: Always runs browser in visible system mode (never truly headless)
but uses viewport sizing to control interaction vs stealth behavior.
"""
_shared_browser = None
_shared_playwright = None
[docs]
def __init__(self, mode):
"""Initialize browser mixin.
Args:
mode: Browser mode - 'interactive' or 'stealth'
"""
assert mode in ["interactive", "stealth"]
self.cookie_acceptor = CookieAutoAcceptor()
self.mode = mode
self.contexts = []
self.pages = []
[docs]
@classmethod
async def get_shared_browser_async(cls) -> Browser:
"""Get or create shared browser instance (deprecated - use get_browser_async)."""
if cls._shared_browser is None or cls._shared_browser.is_connected() is False:
if cls._shared_playwright is None:
cls._shared_playwright = await async_playwright().start()
cls._shared_browser = await cls._shared_playwright.chromium.launch(
headless=True,
args=["--no-sandbox", "--disable-dev-shm-usage"],
)
return cls._shared_browser
[docs]
@classmethod
async def cleanup_shared_browser_async(cls):
"""Clean up shared browser instance (call on app shutdown)."""
if cls._shared_browser:
await cls._shared_browser.close()
cls._shared_browser = None
if cls._shared_playwright:
await cls._shared_playwright.stop()
cls._shared_playwright = None
[docs]
async def get_browser_async(self) -> Browser:
"""Get or create a local browser instance with the current mode setting."""
if self._shared_browser is None or self._shared_browser.is_connected() is False:
if self._shared_playwright is None:
self._shared_playwright = await async_playwright().start()
# Enhanced stealth launch arguments
stealth_args = [
"--no-sandbox",
"--disable-dev-shm-usage",
"--disable-blink-features=AutomationControlled",
"--disable-web-security",
"--disable-features=VizDisplayCompositor",
"--disable-background-networking",
"--disable-sync",
"--disable-translate",
"--disable-default-apps",
"--enable-extensions", # Enable extensions support
"--no-first-run",
"--no-default-browser-check",
"--disable-background-timer-throttling",
"--disable-backgrounding-occluded-windows",
"--disable-renderer-backgrounding",
"--disable-field-trial-config",
"--disable-client-side-phishing-detection",
"--disable-component-update",
"--disable-plugins-discovery",
"--disable-hang-monitor",
"--disable-prompt-on-repost",
"--disable-domain-reliability",
"--disable-infobars",
"--disable-notifications",
"--disable-popup-blocking",
"--window-size=1920,1080",
]
# Always run in visible mode (never headless)
# This is safer for bot detection while providing flexibility via viewport sizing
self._shared_browser = await self._shared_playwright.chromium.launch(
headless=False,
args=stealth_args,
)
return self._shared_browser
[docs]
async def new_page(self, url=None):
"""Create new page/tab and optionally navigate to URL."""
browser = await self.get_browser_async()
context = await browser.new_context()
await context.add_init_script(self.cookie_acceptor.get_auto_acceptor_script())
# await self.cookie_acceptor.inject_auto_acceptor_async(context)
page = await context.new_page()
self.contexts.append(context)
self.pages.append(page)
if url:
await page.goto(url, wait_until="domcontentloaded", timeout=30000)
return page
[docs]
async def close_page(self, page_index):
"""Close specific page/tab by index."""
if 0 <= page_index < len(self.pages):
await self.contexts[page_index].close()
self.contexts.pop(page_index)
self.pages.pop(page_index)
[docs]
async def close_all_pages(self):
"""Close all pages/tabs."""
for context in self.contexts:
await context.close()
self.contexts.clear()
self.pages.clear()
[docs]
async def create_browser_context_async(
self, playwright_instance, **context_options
):
"""Create browser context with cookie auto-acceptance."""
# Use headless mode for stealth, visible for interactive
is_headless = self.mode == "stealth"
browser = await playwright_instance.chromium.launch(headless=is_headless)
# # Smart viewport sizing based on mode
# if "viewport" not in context_options:
# if self.mode == "stealth":
# # For stealth mode: use minimal viewport to avoid detection
# context_options["viewport"] = {"width": 1, "height": 1}
# else: # interactive mode
# # For interactive mode: use human-friendly size
# context_options["viewport"] = {"width": 1280, "height": 720}
context = await browser.new_context(**context_options)
await context.add_init_script(self.cookie_acceptor.get_auto_acceptor_script())
# await self.cookie_acceptor.inject_auto_acceptor_async(context)
return browser, context
[docs]
async def get_session_async(self, timeout: int = 30) -> aiohttp.ClientSession:
"""Get or create basic aiohttp session."""
if (
not hasattr(self, "_session")
or self._session is None
or self._session.closed
):
connector = aiohttp.TCPConnector()
client_timeout = aiohttp.ClientTimeout(total=timeout)
self._session = aiohttp.ClientSession(
connector=connector, timeout=client_timeout
)
return self._session
[docs]
async def close_session(self):
"""Close the aiohttp session."""
if hasattr(self, "_session") and self._session and not self._session.closed:
await self._session.close()
self._session = None
[docs]
async def accept_cookies_async(self, page_index=0, wait_seconds=2):
"""Manually accept cookies on specific page."""
if 0 <= page_index < len(self.pages):
return await self.cookie_acceptor.accept_cookies_async(
self.pages[page_index], wait_seconds
)
return False
[docs]
def interactive(self):
"""Set browser to interactive mode (human-friendly viewport)."""
if self.mode == "interactive":
return self
self.mode = "interactive"
self._shared_browser = None
return self
[docs]
def stealth(self):
"""Set browser to stealth mode (minimal viewport for bot detection avoidance)."""
if self.mode == "stealth":
return self
self.mode = "stealth"
self._shared_browser = None
return self
[docs]
async def show_async(self):
"""Switch browser to interactive mode and recreate all existing pages at current URLs."""
if self.mode == "interactive":
return self
self.mode = "interactive"
await self._restart_contexts_async()
return self
[docs]
async def hide_async(self):
"""Switch browser to stealth mode and recreate all existing pages at current URLs."""
if self.mode == "stealth":
return self
self.mode = "stealth"
await self._restart_contexts_async()
return self
async def _restart_contexts_async(self):
page_urls = [page.url for page in self.pages]
await self.close_all_pages()
self._shared_browser = None
for url in page_urls:
await self.new_page(url)
async def __aenter__(self):
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
await self.close_all_pages()
await self.close_session()
def main(args):
"""Demonstrate BrowserMixin functionality."""
import asyncio
from scitex_browser.core import BrowserMixin
class DemoBrowser(BrowserMixin):
async def scrape_async(self, url):
page = await self.new_page(url)
return await page.content()
async def demo():
browser = DemoBrowser(mode="interactive")
# Scrape a page
content = await browser.scrape_async("https://example.com")
print(f"✓ Fetched {len(content)} bytes")
print(f"✓ Open tabs: {len(browser.pages)}")
# Close
await browser.close_all_pages()
print("✓ Demo complete")
asyncio.run(demo())
return 0
def parse_args():
"""Parse command line arguments."""
import argparse
parser = argparse.ArgumentParser(description="BrowserMixin demo")
return parser.parse_args()
def run_main() -> None:
"""Initialize scitex framework, run main function, and cleanup."""
global CONFIG, CC, sys, plt, rng
import sys
import matplotlib.pyplot as plt
import scitex as stx
args = parse_args()
CONFIG, sys.stdout, sys.stderr, plt, CC, rng = stx.session.start(
sys,
plt,
args=args,
file=__FILE__,
sdir_suffix=None,
verbose=False,
agg=True,
)
exit_status = main(args)
stx.session.close(
CONFIG,
verbose=False,
notify=False,
message="",
exit_status=exit_status,
)
if __name__ == "__main__":
run_main()
# python -m scitex_browser.core.BrowserMixin
# EOF