Metadata-Version: 2.2
Name: unstructured-ingest
Version: 0.5.6
Summary: A library that prepares raw documents for downstream ML tasks.
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
Author: Unstructured Technologies
Author-email: devops@unstructuredai.io
License: Apache-2.0
Keywords: NLP PDF HTML CV XML parsing preprocessing
Classifier: Development Status :: 4 - Beta
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: Education
Classifier: Intended Audience :: Science/Research
Classifier: License :: OSI Approved :: Apache Software License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
Requires-Python: >=3.9.0,<3.14
Description-Content-Type: text/markdown
License-File: LICENSE.md
Requires-Dist: dataclasses_json
Requires-Dist: click
Requires-Dist: python-dateutil
Requires-Dist: tqdm
Requires-Dist: pydantic>=2.7
Requires-Dist: opentelemetry-sdk
Requires-Dist: pandas
Provides-Extra: remote
Requires-Dist: unstructured-client>=0.26.1; extra == "remote"
Provides-Extra: csv
Requires-Dist: unstructured[tsv]; extra == "csv"
Provides-Extra: doc
Requires-Dist: unstructured[docx]; extra == "doc"
Provides-Extra: docx
Requires-Dist: unstructured[docx]; extra == "docx"
Provides-Extra: epub
Requires-Dist: unstructured[epub]; extra == "epub"
Provides-Extra: md
Requires-Dist: unstructured[md]; extra == "md"
Provides-Extra: msg
Requires-Dist: unstructured[msg]; extra == "msg"
Provides-Extra: odt
Requires-Dist: unstructured[odt]; extra == "odt"
Provides-Extra: org
Requires-Dist: unstructured[org]; extra == "org"
Provides-Extra: pdf
Requires-Dist: unstructured[pdf]; extra == "pdf"
Provides-Extra: ppt
Requires-Dist: unstructured[pptx]; extra == "ppt"
Provides-Extra: pptx
Requires-Dist: unstructured[pptx]; extra == "pptx"
Provides-Extra: rtf
Requires-Dist: unstructured[rtf]; extra == "rtf"
Provides-Extra: rst
Requires-Dist: unstructured[rst]; extra == "rst"
Provides-Extra: tsv
Requires-Dist: unstructured[tsv]; extra == "tsv"
Provides-Extra: xlsx
Requires-Dist: unstructured[xlsx]; extra == "xlsx"
Provides-Extra: airtable
Requires-Dist: pyairtable; extra == "airtable"
Provides-Extra: astradb
Requires-Dist: astrapy; extra == "astradb"
Provides-Extra: azure
Requires-Dist: adlfs; extra == "azure"
Requires-Dist: fsspec; extra == "azure"
Provides-Extra: azure-ai-search
Requires-Dist: azure-search-documents; extra == "azure-ai-search"
Provides-Extra: biomed
Requires-Dist: bs4; extra == "biomed"
Requires-Dist: requests; extra == "biomed"
Provides-Extra: box
Requires-Dist: fsspec; extra == "box"
Requires-Dist: boxfs; extra == "box"
Provides-Extra: chroma
Requires-Dist: chromadb; extra == "chroma"
Provides-Extra: clarifai
Requires-Dist: clarifai; extra == "clarifai"
Provides-Extra: confluence
Requires-Dist: atlassian-python-api; extra == "confluence"
Requires-Dist: requests; extra == "confluence"
Provides-Extra: couchbase
Requires-Dist: couchbase; extra == "couchbase"
Provides-Extra: delta-table
Requires-Dist: deltalake; extra == "delta-table"
Requires-Dist: boto3; extra == "delta-table"
Provides-Extra: discord
Requires-Dist: discord.py; extra == "discord"
Provides-Extra: dropbox
Requires-Dist: fsspec; extra == "dropbox"
Requires-Dist: dropboxdrivefs; extra == "dropbox"
Provides-Extra: duckdb
Requires-Dist: duckdb; extra == "duckdb"
Provides-Extra: elasticsearch
Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
Provides-Extra: gcs
Requires-Dist: bs4; extra == "gcs"
Requires-Dist: fsspec; extra == "gcs"
Requires-Dist: gcsfs; extra == "gcs"
Provides-Extra: github
Requires-Dist: pygithub>1.58.0; extra == "github"
Requires-Dist: requests; extra == "github"
Provides-Extra: gitlab
Requires-Dist: python-gitlab; extra == "gitlab"
Provides-Extra: google-drive
Requires-Dist: google-api-python-client; extra == "google-drive"
Provides-Extra: hubspot
Requires-Dist: hubspot-api-client; extra == "hubspot"
Requires-Dist: urllib3; extra == "hubspot"
Provides-Extra: jira
Requires-Dist: atlassian-python-api; extra == "jira"
Provides-Extra: kafka
Requires-Dist: confluent-kafka; extra == "kafka"
Provides-Extra: kdbai
Requires-Dist: kdbai-client>=1.4.0; extra == "kdbai"
Provides-Extra: lancedb
Requires-Dist: lancedb; extra == "lancedb"
Provides-Extra: milvus
Requires-Dist: pymilvus; extra == "milvus"
Provides-Extra: mongodb
Requires-Dist: pymongo; extra == "mongodb"
Provides-Extra: neo4j
Requires-Dist: cymple; extra == "neo4j"
Requires-Dist: neo4j-rust-ext; extra == "neo4j"
Requires-Dist: networkx; extra == "neo4j"
Provides-Extra: notion
Requires-Dist: backoff; extra == "notion"
Requires-Dist: httpx; extra == "notion"
Requires-Dist: notion-client; extra == "notion"
Requires-Dist: htmlBuilder; extra == "notion"
Provides-Extra: onedrive
Requires-Dist: bs4; extra == "onedrive"
Requires-Dist: msal; extra == "onedrive"
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
Provides-Extra: opensearch
Requires-Dist: opensearch-py; extra == "opensearch"
Provides-Extra: outlook
Requires-Dist: msal; extra == "outlook"
Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
Provides-Extra: pinecone
Requires-Dist: pinecone-client>=3.7.1; extra == "pinecone"
Provides-Extra: postgres
Requires-Dist: psycopg2-binary; extra == "postgres"
Provides-Extra: qdrant
Requires-Dist: qdrant-client; extra == "qdrant"
Provides-Extra: reddit
Requires-Dist: praw; extra == "reddit"
Provides-Extra: redis
Requires-Dist: redis; extra == "redis"
Provides-Extra: s3
Requires-Dist: fsspec; extra == "s3"
Requires-Dist: s3fs; extra == "s3"
Provides-Extra: sharepoint
Requires-Dist: msal; extra == "sharepoint"
Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
Provides-Extra: salesforce
Requires-Dist: simple-salesforce; extra == "salesforce"
Provides-Extra: sftp
Requires-Dist: paramiko; extra == "sftp"
Requires-Dist: fsspec; extra == "sftp"
Provides-Extra: slack
Requires-Dist: slack_sdk[optional]; extra == "slack"
Provides-Extra: snowflake
Requires-Dist: snowflake-connector-python; extra == "snowflake"
Requires-Dist: psycopg2-binary; extra == "snowflake"
Provides-Extra: wikipedia
Requires-Dist: wikipedia; extra == "wikipedia"
Provides-Extra: weaviate
Requires-Dist: weaviate-client; extra == "weaviate"
Provides-Extra: databricks-volumes
Requires-Dist: databricks-sdk; extra == "databricks-volumes"
Provides-Extra: databricks-delta-tables
Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
Provides-Extra: singlestore
Requires-Dist: singlestoredb; extra == "singlestore"
Provides-Extra: vectara
Requires-Dist: httpx; extra == "vectara"
Requires-Dist: requests; extra == "vectara"
Requires-Dist: aiofiles; extra == "vectara"
Provides-Extra: vastdb
Requires-Dist: vastdb; extra == "vastdb"
Requires-Dist: pyarrow; extra == "vastdb"
Requires-Dist: ibis; extra == "vastdb"
Provides-Extra: embed-huggingface
Requires-Dist: sentence-transformers; extra == "embed-huggingface"
Provides-Extra: embed-octoai
Requires-Dist: openai; extra == "embed-octoai"
Requires-Dist: tiktoken; extra == "embed-octoai"
Provides-Extra: embed-vertexai
Requires-Dist: vertexai; extra == "embed-vertexai"
Provides-Extra: embed-voyageai
Requires-Dist: voyageai; extra == "embed-voyageai"
Provides-Extra: embed-mixedbreadai
Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
Provides-Extra: openai
Requires-Dist: openai; extra == "openai"
Requires-Dist: tiktoken; extra == "openai"
Provides-Extra: bedrock
Requires-Dist: boto3; extra == "bedrock"
Requires-Dist: aioboto3; extra == "bedrock"
Provides-Extra: togetherai
Requires-Dist: together; extra == "togetherai"
Dynamic: author
Dynamic: author-email
Dynamic: classifier
Dynamic: description
Dynamic: description-content-type
Dynamic: home-page
Dynamic: keywords
Dynamic: license
Dynamic: provides-extra
Dynamic: requires-dist
Dynamic: requires-python
Dynamic: summary

# Unstructured Ingest  

For details, see the [Unstructured Ingest overview](https://docs.unstructured.io/ingestion/overview) in the Unstructured documentation.
