Metadata-Version: 2.1
Name: mineru-html
Version: 1.0.0
Summary: HTML main content extractor based on large language models
License: Apache License 2.0
Requires-Python: >=3.10
License-File: LICENCE
License-File: NOTICE
Requires-Dist: beautifulsoup4
Requires-Dist: fastapi
Requires-Dist: html2text
Requires-Dist: html-text
Requires-Dist: jieba
Requires-Dist: lxml
Requires-Dist: pandas
Requires-Dist: pydantic
Requires-Dist: ray
Requires-Dist: rouge-score
Requires-Dist: trafilatura
Requires-Dist: vllm (==0.11.1)
Provides-Extra: baselines
Requires-Dist: readabilipy (==0.3.0) ; extra == 'baselines'
Requires-Dist: readability-lxml (==0.8.4.1) ; extra == 'baselines'
Requires-Dist: resiliparse (==0.15.2) ; extra == 'baselines'
Requires-Dist: justext (==3.0.2) ; extra == 'baselines'
Requires-Dist: gne (==0.3.1) ; extra == 'baselines'
Requires-Dist: goose3 (==3.1.20) ; extra == 'baselines'
Requires-Dist: boilerpy3 (==1.0.7) ; extra == 'baselines'
Requires-Dist: crawl4ai (==0.7.7) ; extra == 'baselines'

