LICENSE
MANIFEST.in
README.md
README_zh.md
pyproject.toml
requirements.txt
setup.py
docs/LLM_WEBKIT.md
docs/TEDS.md
docs/assets/arch.png
examples/basic_usage.py
examples/demo.py
examples/llm_webkit_usage.py
examples/magic_html_extract_demo.py
examples/main_html_eval.py
examples/multi_extractor_compare.py
examples/resiliparse_extract_demo.py
examples/teds_usage.py
examples/test_model.py
examples/test_table_extract.py
examples/trafilatura_extract_demo.py
scripts/DOMAIN_STATS_README.md
scripts/README.md
scripts/diff_jsonl.py
scripts/domain_stats.py
scripts/filter_normal_jsonl.py
scripts/language_classify.py
scripts/merge_jsonl.py
scripts/process_dataset.py
scripts/process_dataset.sh
scripts/simplify_meta.py
scripts/statics.py
scripts/style_classify.py
tools/label_tool.py
webmainbench/__init__.py
webmainbench/config.py
webmainbench/data/__init__.py
webmainbench/data/dataset.py
webmainbench/data/loader.py
webmainbench/data/saver.py
webmainbench/evaluator/__init__.py
webmainbench/evaluator/evaluator.py
webmainbench/evaluator/main_html_evaluator.py
webmainbench/extractors/__init__.py
webmainbench/extractors/base.py
webmainbench/extractors/dripper_extractor.py
webmainbench/extractors/factory.py
webmainbench/extractors/jina_extractor.py
webmainbench/extractors/llm_webkit_extractor.py
webmainbench/extractors/magic_html_extractor.py
webmainbench/extractors/resiliparse_extractor.py
webmainbench/extractors/test_model_extractor.py
webmainbench/extractors/trafilatura_extractor.py
webmainbench/extractors/trafilatura_txt_extractor.py
webmainbench/metrics/__init__.py
webmainbench/metrics/base.py
webmainbench/metrics/base_content_splitter.py
webmainbench/metrics/calculator.py
webmainbench/metrics/code_extractor.py
webmainbench/metrics/formula_extractor.py
webmainbench/metrics/formula_metrics.py
webmainbench/metrics/mainhtml_calculator.py
webmainbench/metrics/table_extractor.py
webmainbench/metrics/table_metrics.py
webmainbench/metrics/teds_metrics.py
webmainbench/metrics/text_metrics.py
webmainbench/utils/__init__.py
webmainbench/utils/helpers.py
webmainbench/utils/main_html.py