LICENSE
README.md
pyproject.toml
src/docpull/__init__.py
src/docpull/__main__.py
src/docpull/accounting.py
src/docpull/auth_cli.py
src/docpull/benchmark.py
src/docpull/cli.py
src/docpull/discovery_cli.py
src/docpull/doctor.py
src/docpull/evidence_pack.py
src/docpull/exports.py
src/docpull/graph.py
src/docpull/judge.py
src/docpull/local_workflows.py
src/docpull/metadata_extractor.py
src/docpull/monitor.py
src/docpull/pack_reader.py
src/docpull/pack_tools.py
src/docpull/parallel_workflows.py
src/docpull/parity.py
src/docpull/parity_cli.py
src/docpull/passk.py
src/docpull/policy.py
src/docpull/policy_cli.py
src/docpull/provider_adapters.py
src/docpull/provider_capabilities.py
src/docpull/provider_cli.py
src/docpull/provider_keys.py
src/docpull/provider_probes.py
src/docpull/py.typed
src/docpull/rendering.py
src/docpull/scraper.py
src/docpull/server.py
src/docpull/share.py
src/docpull/skill_export.py
src/docpull/source_scoring.py
src/docpull/time_utils.py
src/docpull.egg-info/PKG-INFO
src/docpull.egg-info/SOURCES.txt
src/docpull.egg-info/dependency_links.txt
src/docpull.egg-info/entry_points.txt
src/docpull.egg-info/requires.txt
src/docpull.egg-info/top_level.txt
src/docpull/cache/__init__.py
src/docpull/cache/frontier.py
src/docpull/cache/manager.py
src/docpull/cache/streaming_dedup.py
src/docpull/conversion/__init__.py
src/docpull/conversion/chunking.py
src/docpull/conversion/extractor.py
src/docpull/conversion/filings.py
src/docpull/conversion/markdown.py
src/docpull/conversion/protocols.py
src/docpull/conversion/special_cases.py
src/docpull/conversion/trafilatura_extractor.py
src/docpull/core/__init__.py
src/docpull/core/fetcher.py
src/docpull/discovery/__init__.py
src/docpull/discovery/_fetch.py
src/docpull/discovery/composite.py
src/docpull/discovery/contracts.py
src/docpull/discovery/crawler.py
src/docpull/discovery/filters.py
src/docpull/discovery/protocols.py
src/docpull/discovery/sitemap.py
src/docpull/discovery/link_extractors/__init__.py
src/docpull/discovery/link_extractors/enhanced.py
src/docpull/discovery/link_extractors/protocols.py
src/docpull/discovery/link_extractors/static.py
src/docpull/fixtures/__init__.py
src/docpull/fixtures/parallel-search-extract.json
src/docpull/http/__init__.py
src/docpull/http/client.py
src/docpull/http/protocols.py
src/docpull/http/rate_limiter.py
src/docpull/mcp/__init__.py
src/docpull/mcp/server.py
src/docpull/mcp/sources.py
src/docpull/mcp/tools.py
src/docpull/models/__init__.py
src/docpull/models/config.py
src/docpull/models/document.py
src/docpull/models/events.py
src/docpull/models/profiles.py
src/docpull/models/run.py
src/docpull/pipeline/__init__.py
src/docpull/pipeline/base.py
src/docpull/pipeline/manifest.py
src/docpull/pipeline/steps/__init__.py
src/docpull/pipeline/steps/chunk.py
src/docpull/pipeline/steps/convert.py
src/docpull/pipeline/steps/dedup.py
src/docpull/pipeline/steps/fetch.py
src/docpull/pipeline/steps/metadata.py
src/docpull/pipeline/steps/render.py
src/docpull/pipeline/steps/save.py
src/docpull/pipeline/steps/save_json.py
src/docpull/pipeline/steps/save_ndjson.py
src/docpull/pipeline/steps/save_okf.py
src/docpull/pipeline/steps/save_sqlite.py
src/docpull/pipeline/steps/validate.py
src/docpull/security/__init__.py
src/docpull/security/download_policy.py
src/docpull/security/robots.py
src/docpull/security/url_validator.py
tests/test_accounting.py
tests/test_auth_cli.py
tests/test_benchmark.py
tests/test_cache_conditional_get.py
tests/test_chunking.py
tests/test_ci_policy.py
tests/test_cli.py
tests/test_conversion.py
tests/test_convert_step_new.py
tests/test_discovery.py
tests/test_discovery_contracts.py
tests/test_doctor.py
tests/test_document_record.py
tests/test_evidence_pack.py
tests/test_exports.py
tests/test_frontier_resume.py
tests/test_graph.py
tests/test_integration.py
tests/test_judge.py
tests/test_link_extractors.py
tests/test_local_workflows.py
tests/test_mcp_server.py
tests/test_mcp_tools.py
tests/test_metadata_extractor.py
tests/test_monitor.py
tests/test_naming.py
tests/test_outputs_e2e.py
tests/test_pack_server.py
tests/test_pack_tools.py
tests/test_parallel_workflows.py
tests/test_parity_workflows.py
tests/test_passk.py
tests/test_pipeline.py
tests/test_policy.py
tests/test_policy_cli.py
tests/test_provider_adapters.py
tests/test_provider_cli.py
tests/test_provider_keys.py
tests/test_provider_probes.py
tests/test_rate_limiter.py
tests/test_real_site_regressions.py
tests/test_rendering.py
tests/test_save_ndjson.py
tests/test_save_sqlite.py
tests/test_security_hardening.py
tests/test_share.py
tests/test_special_cases.py
tests/test_surface_contract.py
tests/test_time_utils.py
tests/test_trafilatura_extractor.py