CHANGELOG.md
LICENSE
MANIFEST.in
README.md
pyproject.toml
requirements.txt
setup.py
docs/changelog.md
docs/index.md
docs/advanced/distributed-crawling.md
docs/advanced/hooks-auth.md
docs/advanced/proxy-security.md
docs/advanced/session-management.md
docs/api/API.md
docs/architecture/ARCHITECTURE_OVERVIEW.md
docs/architecture/COMMUNICATION_CHANNELS.md
docs/architecture/COMPONENT_CATALOG.md
docs/architecture/EXTENSION_POINTS.md
docs/architecture/FACTORY_LIFECYCLE.md
docs/architecture/LAYER_SPECIFICATION.md
docs/architecture/PLUGIN_SYSTEM.md
docs/architecture/STRUCTURED_DATA_PLUGIN.md
docs/core/cache-modes.md
docs/core/deep-crawling.md
docs/core/graph-operations.md
docs/core/simple-crawling.md
docs/core/url-rules.md
docs/extraction/custom-extractors.md
docs/extraction/plugins.md
docs/extraction/structured-data.md
docs/getting-started/examples.md
docs/getting-started/installation.md
docs/getting-started/quickstart.md
graph_crawler/__init__.py
graph_crawler/__version__.py
graph_crawler/py.typed
graph_crawler.egg-info/PKG-INFO
graph_crawler.egg-info/SOURCES.txt
graph_crawler.egg-info/dependency_links.txt
graph_crawler.egg-info/entry_points.txt
graph_crawler.egg-info/not-zip-safe
graph_crawler.egg-info/requires.txt
graph_crawler.egg-info/top_level.txt
graph_crawler/api/__init__.py
graph_crawler/api/_core.py
graph_crawler/api/_distributed.py
graph_crawler/api/_shared.py
graph_crawler/api/_sitemap_distributed.py
graph_crawler/api/async_.py
graph_crawler/api/cli.py
graph_crawler/api/crawl_monitor.py
graph_crawler/api/dashboard.py
graph_crawler/api/history_manager.py
graph_crawler/api/project_init.py
graph_crawler/api/rest_api.py
graph_crawler/api/stats_collector.py
graph_crawler/api/sync.py
graph_crawler/api/webhooks.py
graph_crawler/api/websocket_manager.py
graph_crawler/api/client/__init__.py
graph_crawler/api/client/client.py
graph_crawler/application/__init__.py
graph_crawler/application/context/__init__.py
graph_crawler/application/context/dependency_registry.py
graph_crawler/application/context/graph_context.py
graph_crawler/application/context/merge_context.py
graph_crawler/application/dto/__init__.py
graph_crawler/application/dto/edge_dto.py
graph_crawler/application/dto/graph_dto.py
graph_crawler/application/dto/node_dto.py
graph_crawler/application/dto/utils.py
graph_crawler/application/dto/mappers/__init__.py
graph_crawler/application/dto/mappers/edge_mapper.py
graph_crawler/application/dto/mappers/graph_mapper.py
graph_crawler/application/dto/mappers/node_mapper.py
graph_crawler/application/services/__init__.py
graph_crawler/application/services/application_container.py
graph_crawler/application/services/driver_factory.py
graph_crawler/application/services/storage_factory.py
graph_crawler/application/services/exporters/__init__.py
graph_crawler/application/services/exporters/base_exporter.py
graph_crawler/application/services/exporters/csv_exporter.py
graph_crawler/application/services/exporters/edge_exporter.py
graph_crawler/application/services/exporters/excel_exporter.py
graph_crawler/application/services/exporters/parquet_exporter.py
graph_crawler/application/services/exporters/sql_exporter.py
graph_crawler/application/use_cases/__init__.py
graph_crawler/application/use_cases/crawling/__init__.py
graph_crawler/application/use_cases/crawling/adaptive_throttler.py
graph_crawler/application/use_cases/crawling/base_spider.py
graph_crawler/application/use_cases/crawling/celery_batch_spider.py
graph_crawler/application/use_cases/crawling/celery_spider.py
graph_crawler/application/use_cases/crawling/checkpoint.py
graph_crawler/application/use_cases/crawling/crawl_coordinator.py
graph_crawler/application/use_cases/crawling/dead_letter_queue.py
graph_crawler/application/use_cases/crawling/domain_rate_limiter.py
graph_crawler/application/use_cases/crawling/incremental_strategy.py
graph_crawler/application/use_cases/crawling/link_processor.py
graph_crawler/application/use_cases/crawling/multiprocess_spider.py
graph_crawler/application/use_cases/crawling/node_scanner.py
graph_crawler/application/use_cases/crawling/progress_tracker.py
graph_crawler/application/use_cases/crawling/scheduler.py
graph_crawler/application/use_cases/crawling/serialization_mixin.py
graph_crawler/application/use_cases/crawling/sitemap_parser.py
graph_crawler/application/use_cases/crawling/sitemap_processor.py
graph_crawler/application/use_cases/crawling/sitemap_spider.py
graph_crawler/application/use_cases/crawling/spider.py
graph_crawler/application/use_cases/crawling/spider_lifecycle.py
graph_crawler/application/use_cases/crawling/spider_refactored.py
graph_crawler/application/use_cases/crawling/filters/__init__.py
graph_crawler/application/use_cases/crawling/filters/base.py
graph_crawler/application/use_cases/crawling/filters/domain_filter.py
graph_crawler/application/use_cases/crawling/filters/domain_patterns.py
graph_crawler/application/use_cases/crawling/filters/path_filter.py
graph_crawler/application/use_cases/crawling/parsers/__init__.py
graph_crawler/application/use_cases/crawling/parsers/base.py
graph_crawler/application/use_cases/crawling/parsers/html_parser.py
graph_crawler/domain/__init__.py
graph_crawler/domain/entities/__init__.py
graph_crawler/domain/entities/edge.py
graph_crawler/domain/entities/edge_analysis.py
graph_crawler/domain/entities/graph.py
graph_crawler/domain/entities/graph_operations.py
graph_crawler/domain/entities/graph_statistics.py
graph_crawler/domain/entities/merge_strategies.py
graph_crawler/domain/entities/node.py
graph_crawler/domain/entities/registries.py
graph_crawler/domain/entities/sitemap_node.py
graph_crawler/domain/events/__init__.py
graph_crawler/domain/events/event_bus.py
graph_crawler/domain/events/events.py
graph_crawler/domain/interfaces/__init__.py
graph_crawler/domain/interfaces/adapter.py
graph_crawler/domain/interfaces/driver.py
graph_crawler/domain/interfaces/event_bus.py
graph_crawler/domain/interfaces/filter.py
graph_crawler/domain/interfaces/node_interfaces.py
graph_crawler/domain/interfaces/plugin_manager.py
graph_crawler/domain/interfaces/processor.py
graph_crawler/domain/interfaces/scanner.py
graph_crawler/domain/interfaces/scheduler.py
graph_crawler/domain/interfaces/spider.py
graph_crawler/domain/interfaces/storage.py
graph_crawler/domain/interfaces/unified_storage.py
graph_crawler/domain/value_objects/__init__.py
graph_crawler/domain/value_objects/configs.py
graph_crawler/domain/value_objects/lifecycle.py
graph_crawler/domain/value_objects/models.py
graph_crawler/domain/value_objects/settings.py
graph_crawler/extensions/__init__.py
graph_crawler/extensions/middleware/__init__.py
graph_crawler/extensions/middleware/base.py
graph_crawler/extensions/middleware/cache_middleware.py
graph_crawler/extensions/middleware/chain.py
graph_crawler/extensions/middleware/error_recovery_middleware.py
graph_crawler/extensions/middleware/logging_middleware.py
graph_crawler/extensions/middleware/proxy_health.py
graph_crawler/extensions/middleware/proxy_middleware.py
graph_crawler/extensions/middleware/proxy_models.py
graph_crawler/extensions/middleware/proxy_selection.py
graph_crawler/extensions/middleware/rate_limit_middleware.py
graph_crawler/extensions/middleware/request_response_middleware.py
graph_crawler/extensions/middleware/retry_middleware.py
graph_crawler/extensions/middleware/robots_cache.py
graph_crawler/extensions/middleware/robots_middleware.py
graph_crawler/extensions/middleware/robots_validator.py
graph_crawler/extensions/middleware/user_agent_middleware.py
graph_crawler/extensions/plugins/__init__.py
graph_crawler/extensions/plugins/base.py
graph_crawler/extensions/plugins/builtin/__init__.py
graph_crawler/extensions/plugins/builtin/stats_export_plugin.py
graph_crawler/extensions/plugins/crawl_engine/__init__.py
graph_crawler/extensions/plugins/crawl_engine/base.py
graph_crawler/extensions/plugins/crawl_engine/priority_provider.py
graph_crawler/extensions/plugins/crawl_engine/smart_crawl.py
graph_crawler/extensions/plugins/crawl_engine/vector_crawl.py
graph_crawler/extensions/plugins/engine/__init__.py
graph_crawler/extensions/plugins/engine/anti_bot_detection.py
graph_crawler/extensions/plugins/engine/anti_bot_playwright.py
graph_crawler/extensions/plugins/engine/anti_bot_scripts.py
graph_crawler/extensions/plugins/engine/anti_bot_stealth.py
graph_crawler/extensions/plugins/engine/captcha/__init__.py
graph_crawler/extensions/plugins/engine/captcha/detector.py
graph_crawler/extensions/plugins/engine/captcha/models.py
graph_crawler/extensions/plugins/engine/captcha/plugin.py
graph_crawler/extensions/plugins/engine/captcha/services.py
graph_crawler/extensions/plugins/node/__init__.py
graph_crawler/extensions/plugins/node/base.py
graph_crawler/extensions/plugins/node/defaults.py
graph_crawler/extensions/plugins/node/links.py
graph_crawler/extensions/plugins/node/metadata.py
graph_crawler/extensions/plugins/node/smart_page_finder.py
graph_crawler/extensions/plugins/node/text.py
graph_crawler/extensions/plugins/node/content_extractors/__init__.py
graph_crawler/extensions/plugins/node/content_extractors/base.py
graph_crawler/extensions/plugins/node/content_extractors/goose3_extractor.py
graph_crawler/extensions/plugins/node/content_extractors/newspaper_extractor.py
graph_crawler/extensions/plugins/node/content_extractors/plugin.py
graph_crawler/extensions/plugins/node/content_extractors/readability_extractor.py
graph_crawler/extensions/plugins/node/extractors/__init__.py
graph_crawler/extensions/plugins/node/extractors/email_extractor.py
graph_crawler/extensions/plugins/node/extractors/phone_extractor.py
graph_crawler/extensions/plugins/node/extractors/price_extractor.py
graph_crawler/extensions/plugins/node/structured_data/__init__.py
graph_crawler/extensions/plugins/node/structured_data/constants.py
graph_crawler/extensions/plugins/node/structured_data/exceptions.py
graph_crawler/extensions/plugins/node/structured_data/extractor.py
graph_crawler/extensions/plugins/node/structured_data/options.py
graph_crawler/extensions/plugins/node/structured_data/plugin.py
graph_crawler/extensions/plugins/node/structured_data/result.py
graph_crawler/extensions/plugins/node/structured_data/parsers/__init__.py
graph_crawler/extensions/plugins/node/structured_data/parsers/base.py
graph_crawler/extensions/plugins/node/structured_data/parsers/jsonld.py
graph_crawler/extensions/plugins/node/structured_data/parsers/microdata.py
graph_crawler/extensions/plugins/node/structured_data/parsers/opengraph.py
graph_crawler/extensions/plugins/node/structured_data/parsers/rdfa.py
graph_crawler/extensions/plugins/node/structured_data/parsers/twitter.py
graph_crawler/extensions/plugins/node/vectorization/__init__.py
graph_crawler/extensions/plugins/node/vectorization/batch_vectorizer.py
graph_crawler/extensions/plugins/node/vectorization/realtime_vectorizer.py
graph_crawler/extensions/plugins/node/vectorization/utils.py
graph_crawler/infrastructure/__init__.py
graph_crawler/infrastructure/adapters/__init__.py
graph_crawler/infrastructure/adapters/base.py
graph_crawler/infrastructure/adapters/beautifulsoup_adapter.py
graph_crawler/infrastructure/adapters/lxml_adapter.py
graph_crawler/infrastructure/adapters/scrapy_adapter.py
graph_crawler/infrastructure/adapters/selectolax_adapter.py
graph_crawler/infrastructure/messaging/__init__.py
graph_crawler/infrastructure/messaging/celery_app.py
graph_crawler/infrastructure/messaging/celery_batch.py
graph_crawler/infrastructure/messaging/celery_job_task.py
graph_crawler/infrastructure/messaging/celery_unified.py
graph_crawler/infrastructure/messaging/config.py
graph_crawler/infrastructure/messaging/easy_crawler.py
graph_crawler/infrastructure/messaging/worker_api.py
graph_crawler/infrastructure/persistence/__init__.py
graph_crawler/infrastructure/persistence/auto_storage.py
graph_crawler/infrastructure/persistence/base.py
graph_crawler/infrastructure/persistence/graph_repository.py
graph_crawler/infrastructure/persistence/json_storage.py
graph_crawler/infrastructure/persistence/memory_storage.py
graph_crawler/infrastructure/persistence/mongodb_storage.py
graph_crawler/infrastructure/persistence/mongodb_storage_OLD.py
graph_crawler/infrastructure/persistence/naming_strategy.py
graph_crawler/infrastructure/persistence/postgresql_storage.py
graph_crawler/infrastructure/persistence/repository.py
graph_crawler/infrastructure/persistence/sqlite_storage.py
graph_crawler/infrastructure/persistence/json/__init__.py
graph_crawler/infrastructure/persistence/memory/__init__.py
graph_crawler/infrastructure/persistence/protocols/__init__.py
graph_crawler/infrastructure/persistence/sqlite/__init__.py
graph_crawler/infrastructure/persistence/unified/__init__.py
graph_crawler/infrastructure/persistence/unified/file_job_storage.py
graph_crawler/infrastructure/persistence/unified/file_queue_storage.py
graph_crawler/infrastructure/persistence/unified/memory_job_storage.py
graph_crawler/infrastructure/persistence/unified/memory_queue_storage.py
graph_crawler/infrastructure/persistence/unified/postgresql_job_storage.py
graph_crawler/infrastructure/persistence/unified/postgresql_queue_storage.py
graph_crawler/infrastructure/persistence/unified/unified_storage.py
graph_crawler/infrastructure/transport/__init__.py
graph_crawler/infrastructure/transport/base.py
graph_crawler/infrastructure/transport/base_plugin.py
graph_crawler/infrastructure/transport/connection_pool.py
graph_crawler/infrastructure/transport/context.py
graph_crawler/infrastructure/transport/factory.py
graph_crawler/infrastructure/transport/plugin_manager.py
graph_crawler/infrastructure/transport/protocols.py
graph_crawler/infrastructure/transport/session_adapters.py
graph_crawler/infrastructure/transport/session_manager.py
graph_crawler/infrastructure/transport/session_protocol.py
graph_crawler/infrastructure/transport/async_http/__init__.py
graph_crawler/infrastructure/transport/async_http/config.py
graph_crawler/infrastructure/transport/async_http/context.py
graph_crawler/infrastructure/transport/async_http/driver.py
graph_crawler/infrastructure/transport/async_http/driver_v4.py
graph_crawler/infrastructure/transport/async_http/stages.py
graph_crawler/infrastructure/transport/async_http/plugins/__init__.py
graph_crawler/infrastructure/transport/async_http/plugins/headers.py
graph_crawler/infrastructure/transport/async_http/plugins/rate_limiter.py
graph_crawler/infrastructure/transport/async_http/plugins/retry.py
graph_crawler/infrastructure/transport/async_http/plugins/stealth_driver.py
graph_crawler/infrastructure/transport/browser/__init__.py
graph_crawler/infrastructure/transport/core/__init__.py
graph_crawler/infrastructure/transport/core/base_async.py
graph_crawler/infrastructure/transport/core/base_sync.py
graph_crawler/infrastructure/transport/core/mixins.py
graph_crawler/infrastructure/transport/http/__init__.py
graph_crawler/infrastructure/transport/playwright/__init__.py
graph_crawler/infrastructure/transport/playwright/config.py
graph_crawler/infrastructure/transport/playwright/context.py
graph_crawler/infrastructure/transport/playwright/driver.py
graph_crawler/infrastructure/transport/playwright/pooled_driver.py
graph_crawler/infrastructure/transport/playwright/stages.py
graph_crawler/infrastructure/transport/playwright/plugins/__init__.py
graph_crawler/infrastructure/transport/playwright/plugins/captcha_detector.py
graph_crawler/infrastructure/transport/playwright/plugins/captcha_solver.py
graph_crawler/infrastructure/transport/playwright/plugins/cloudflare.py
graph_crawler/infrastructure/transport/playwright/plugins/enhanced_cloudflare.py
graph_crawler/infrastructure/transport/playwright/plugins/enhanced_stealth.py
graph_crawler/infrastructure/transport/playwright/plugins/form_filler.py
graph_crawler/infrastructure/transport/playwright/plugins/human_behavior.py
graph_crawler/infrastructure/transport/playwright/plugins/screenshot.py
graph_crawler/infrastructure/transport/playwright/plugins/stealth.py
graph_crawler/infrastructure/transport/sync/__init__.py
graph_crawler/infrastructure/transport/sync/requests_driver.py
graph_crawler/native/README.md
graph_crawler/native/__init__.py
graph_crawler/native/bloom_filter.pyx
graph_crawler/native/html_parser.pyx
graph_crawler/native/setup.py
graph_crawler/native/url_utils.pyx
graph_crawler/observability/__init__.py
graph_crawler/observability/error_tracing.py
graph_crawler/observability/metrics_core.py
graph_crawler/observability/structured_logging.py
graph_crawler/observability/decorators/__init__.py
graph_crawler/observability/decorators/cache.py
graph_crawler/observability/decorators/log.py
graph_crawler/observability/decorators/retry.py
graph_crawler/observability/decorators/timing.py
graph_crawler/observability/listeners/__init__.py
graph_crawler/observability/listeners/base.py
graph_crawler/observability/listeners/base_metrics_listener.py
graph_crawler/observability/listeners/crawl_listener.py
graph_crawler/observability/listeners/dlq_listener.py
graph_crawler/observability/listeners/error_listener.py
graph_crawler/observability/listeners/logging_listener.py
graph_crawler/observability/listeners/metrics_listener.py
graph_crawler/observability/listeners/node_listener.py
graph_crawler/observability/listeners/plugin_listener.py
graph_crawler/observability/listeners/storage_listener.py
graph_crawler/observability/listeners/url_listener.py
graph_crawler/observability/metrics/__init__.py
graph_crawler/observability/metrics/memory_profiler.py
graph_crawler/observability/metrics/metrics_collector.py
graph_crawler/observability/metrics/prometheus_metrics.py
graph_crawler/optimizations/__init__.py
graph_crawler/optimizations/simhash_numba.py
graph_crawler/shared/__init__.py
graph_crawler/shared/constants.py
graph_crawler/shared/exceptions.py
graph_crawler/shared/error_handling/__init__.py
graph_crawler/shared/error_handling/error_handler.py
graph_crawler/shared/lifecycle/__init__.py
graph_crawler/shared/lifecycle/lifecycle.py
graph_crawler/shared/security/__init__.py
graph_crawler/shared/security/url_sanitizer.py
graph_crawler/shared/security/url_validator.py
graph_crawler/shared/utils/__init__.py
graph_crawler/shared/utils/bloom_filter.py
graph_crawler/shared/utils/captcha_bypass.py
graph_crawler/shared/utils/celery_config.py
graph_crawler/shared/utils/celery_helpers.py
graph_crawler/shared/utils/distributed_rate_limiter.py
graph_crawler/shared/utils/distributed_rate_limiter_backends.py
graph_crawler/shared/utils/dns_cache.py
graph_crawler/shared/utils/event_publisher_mixin.py
graph_crawler/shared/utils/fast_json.py
graph_crawler/shared/utils/fingerprint.py
graph_crawler/shared/utils/fingerprint_data.py
graph_crawler/shared/utils/fingerprint_generators.py
graph_crawler/shared/utils/fingerprint_profile.py
graph_crawler/shared/utils/fingerprint_stealth.py
graph_crawler/shared/utils/html_utils.py
graph_crawler/shared/utils/memory_optimizer.py
graph_crawler/shared/utils/proxy_manager.py
graph_crawler/shared/utils/rate_limiter.py
graph_crawler/shared/utils/url_utils.py
graph_crawler/shared/utils/user_agent_rotator.py
graph_crawler/shared/utils/validation_helpers.py
graph_crawler/shared/utils/visualization.py
graph_crawler/shared/utils/visualization_core.py
graph_crawler/shared/utils/captcha/__init__.py
graph_crawler/shared/utils/captcha/base.py
graph_crawler/shared/utils/captcha/manager.py
graph_crawler/shared/utils/markdown/__init__.py
graph_crawler/shared/utils/markdown/generator.py
graph_crawler/shared/utils/markdown/options.py
graph_crawler/shared/utils/markdown/result.py