LICENSE
MANIFEST.in
README.md
THIRD_PARTY_NOTICES.md
pyproject.toml
datasets/wikipedia_mini.json
docs/ARCHITECTURE.md
docs/BACKENDS.md
docs/CORPUS.md
docs/CORPUS_DESIGN.md
docs/DEMOS.md
docs/EXTRACTION.md
docs/FEATURE_INDEX.md
docs/ROADMAP.md
docs/TESTING.md
docs/USER_CONFIGURATION.md
docs/api.rst
docs/conf.py
docs/index.rst
features/backend_validation.feature
features/biblicus_corpus.feature
features/cli_entrypoint.feature
features/cli_parsing.feature
features/content_sniffing.feature
features/corpus_edge_cases.feature
features/corpus_identity.feature
features/corpus_purge.feature
features/environment.py
features/error_cases.feature
features/evaluation.feature
features/extraction_error_handling.feature
features/extraction_selection.feature
features/extraction_selection_longest.feature
features/extractor_pipeline.feature
features/extractor_validation.feature
features/frontmatter.feature
features/hook_config_validation.feature
features/hook_error_handling.feature
features/import_tree.feature
features/ingest_sources.feature
features/integration_audio_samples.feature
features/integration_image_samples.feature
features/integration_mixed_corpus.feature
features/integration_mixed_extraction.feature
features/integration_ocr_image_extraction.feature
features/integration_pdf_retrieval.feature
features/integration_pdf_samples.feature
features/integration_unstructured_extraction.feature
features/integration_wikipedia.feature
features/lifecycle_hooks.feature
features/model_validation.feature
features/ocr_extractor.feature
features/pdf_text_extraction.feature
features/python_api.feature
features/python_hook_logging.feature
features/retrieval_budget.feature
features/retrieval_scan.feature
features/retrieval_sqlite_full_text_search.feature
features/retrieval_uses_extraction_run.feature
features/retrieval_utilities.feature
features/source_loading.feature
features/streaming_ingest.feature
features/stt_extractor.feature
features/text_extraction_runs.feature
features/unstructured_extractor.feature
features/user_config.feature
features/steps/backend_steps.py
features/steps/cli_parsing_steps.py
features/steps/cli_steps.py
features/steps/extraction_steps.py
features/steps/extractor_steps.py
features/steps/frontmatter_steps.py
features/steps/model_steps.py
features/steps/openai_steps.py
features/steps/pdf_steps.py
features/steps/python_api_steps.py
features/steps/rapidocr_steps.py
features/steps/retrieval_steps.py
features/steps/stt_steps.py
features/steps/unstructured_steps.py
features/steps/user_config_steps.py
scripts/download_audio_samples.py
scripts/download_image_samples.py
scripts/download_mixed_samples.py
scripts/download_pdf_samples.py
scripts/download_wikipedia.py
scripts/test.py
src/biblicus/__init__.py
src/biblicus/__main__.py
src/biblicus/cli.py
src/biblicus/constants.py
src/biblicus/corpus.py
src/biblicus/errors.py
src/biblicus/evaluation.py
src/biblicus/extraction.py
src/biblicus/frontmatter.py
src/biblicus/hook_logging.py
src/biblicus/hook_manager.py
src/biblicus/hooks.py
src/biblicus/ignore.py
src/biblicus/models.py
src/biblicus/retrieval.py
src/biblicus/sources.py
src/biblicus/time.py
src/biblicus/uris.py
src/biblicus/user_config.py
src/biblicus.egg-info/PKG-INFO
src/biblicus.egg-info/SOURCES.txt
src/biblicus.egg-info/dependency_links.txt
src/biblicus.egg-info/entry_points.txt
src/biblicus.egg-info/requires.txt
src/biblicus.egg-info/top_level.txt
src/biblicus/_vendor/dotyaml/__init__.py
src/biblicus/_vendor/dotyaml/interpolation.py
src/biblicus/_vendor/dotyaml/loader.py
src/biblicus/_vendor/dotyaml/transformer.py
src/biblicus/backends/__init__.py
src/biblicus/backends/base.py
src/biblicus/backends/scan.py
src/biblicus/backends/sqlite_full_text_search.py
src/biblicus/extractors/__init__.py
src/biblicus/extractors/base.py
src/biblicus/extractors/metadata_text.py
src/biblicus/extractors/openai_stt.py
src/biblicus/extractors/pass_through_text.py
src/biblicus/extractors/pdf_text.py
src/biblicus/extractors/pipeline.py
src/biblicus/extractors/rapidocr_text.py
src/biblicus/extractors/select_longest_text.py
src/biblicus/extractors/select_text.py
src/biblicus/extractors/unstructured_text.py