beautifulsoup4>=4.12.0
html2text>=2020.1.16
newspaper3k>=0.2.8
requests>=2.31.0
flashtext>=2.7
nltk>=3.8
pandas>=2.0.0
jsonschema>=4.24.0
jsonpath-ng>=1.6.0
markitdown>=0.1.2
genson>=1.2.0
xmltodict>=0.13.0
google-auth>=2.20.0
google-api-python-client>=2.0.0
tldextract>=3.4.4
lxml>=4.9.0
lxml_html_clean>=0.2.0
python-dateutil>=2.8.2
tqdm>=4.64.0
PyYAML>=6.0
