charset-normalizer
filetype
python-magic
lxml
nltk
requests
beautifulsoup4
emoji
dataclasses-json
python-iso639
langdetect
numpy
rapidfuzz
backoff
typing-extensions
unstructured-client
wrapt
tqdm
psutil
python-oxmsg
html5lib
numba

[all-docs]
onnxruntime>=1.19.0
pikepdf
python-pptx>=1.0.1
python-docx>=1.1.2
pypdf
openpyxl
pandas
pdf2image
markdown
pdfminer.six
unstructured-inference>=1.1.1
pypandoc
networkx
pi_heif
xlrd
msoffcrypto-tool
effdet
google-cloud-vision
onnx>=1.17.0
unstructured.pytesseract>=0.3.12

[chunking-tokens]
tiktoken

[csv]
pandas

[doc]
python-docx>=1.1.2

[docx]
python-docx>=1.1.2

[epub]
pypandoc

[huggingface]
langdetect
sacremoses
sentencepiece
torch
transformers

[image]
onnx>=1.17.0
onnxruntime>=1.19.0
pdf2image
pdfminer.six
pikepdf
pi_heif
pypdf
google-cloud-vision
effdet
unstructured-inference>=1.1.1
unstructured.pytesseract>=0.3.12

[local-inference]
onnxruntime>=1.19.0
pikepdf
python-pptx>=1.0.1
python-docx>=1.1.2
pypdf
openpyxl
pandas
pdf2image
markdown
pdfminer.six
unstructured-inference>=1.1.1
pypandoc
networkx
pi_heif
xlrd
msoffcrypto-tool
effdet
google-cloud-vision
onnx>=1.17.0
unstructured.pytesseract>=0.3.12

[md]
markdown

[odt]
python-docx>=1.1.2
pypandoc

[org]
pypandoc

[paddleocr]
paddlepaddle>=3.0.0b1
unstructured.paddleocr==2.10.0

[pdf]
onnx>=1.17.0
onnxruntime>=1.19.0
pdf2image
pdfminer.six
pikepdf
pi_heif
pypdf
google-cloud-vision
effdet
unstructured-inference>=1.1.1
unstructured.pytesseract>=0.3.12

[ppt]
python-pptx>=1.0.1

[pptx]
python-pptx>=1.0.1

[rst]
pypandoc

[rtf]
pypandoc

[tsv]
pandas

[xlsx]
openpyxl
pandas
xlrd
networkx
msoffcrypto-tool
