Metadata-Version: 2.4
Name: daita-agents
Version: 0.19.0
Summary: Daita Agents - Data focused AI agent framework with free local use and premium hosted enterprise features
Author-email: Daita <support@daita-tech.io>
License: Apache-2.0
Project-URL: Homepage, https://daita-tech.io
Project-URL: Documentation, https://docs.daita-tech.io
Keywords: ai,agents,llm,automation,workflows,sdk
Classifier: Development Status :: 4 - Beta
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: Apache Software License
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
Requires-Python: >=3.11
Description-Content-Type: text/markdown
License-File: LICENSE
Requires-Dist: pydantic>=2.0.0
Requires-Dist: aiohttp>=3.8.0
Requires-Dist: python-dotenv>=1.0.0
Requires-Dist: daita-cli>=0.1.0
Requires-Dist: PyYAML>=6.0.0
Requires-Dist: croniter>=1.0.0
Requires-Dist: aiofiles>=24.1.0
Requires-Dist: openai>=1.0.0
Requires-Dist: certifi>=2025.4.0
Requires-Dist: PyJWT>=2.10.0
Requires-Dist: numpy>=1.20.0
Requires-Dist: opentelemetry-api>=1.20.0
Requires-Dist: opentelemetry-sdk>=1.20.0
Provides-Extra: cli
Requires-Dist: watchdog>=3.0.0; extra == "cli"
Provides-Extra: recommended
Requires-Dist: anthropic>=0.5.0; extra == "recommended"
Requires-Dist: pandas>=1.5.0; extra == "recommended"
Requires-Dist: numpy>=1.20.0; extra == "recommended"
Requires-Dist: beautifulsoup4>=4.13.0; extra == "recommended"
Requires-Dist: lxml>=5.4.0; extra == "recommended"
Provides-Extra: memory
Requires-Dist: openai>=1.0.0; extra == "memory"
Requires-Dist: networkx>=3.0; extra == "memory"
Provides-Extra: voyage
Requires-Dist: voyageai>=0.3.0; extra == "voyage"
Provides-Extra: sentence-transformers
Requires-Dist: sentence-transformers>=2.2.0; extra == "sentence-transformers"
Provides-Extra: data
Requires-Dist: pandas>=1.5.0; extra == "data"
Requires-Dist: numpy>=1.20.0; extra == "data"
Requires-Dist: openpyxl>=3.1.0; extra == "data"
Requires-Dist: beautifulsoup4>=4.13.0; extra == "data"
Requires-Dist: lxml>=5.4.0; extra == "data"
Requires-Dist: jsonpath-ng>=1.7.0; extra == "data"
Requires-Dist: thefuzz[speedup]>=0.22.0; extra == "data"
Requires-Dist: psycopg2-binary>=2.9.0; extra == "data"
Requires-Dist: asyncpg>=0.27.0; extra == "data"
Requires-Dist: sqlglot>=25.0.0; extra == "data"
Provides-Extra: production
Requires-Dist: boto3>=1.38.0; extra == "production"
Requires-Dist: fastapi>=0.115.0; extra == "production"
Requires-Dist: uvicorn>=0.34.0; extra == "production"
Provides-Extra: anthropic
Requires-Dist: anthropic>=0.5.0; extra == "anthropic"
Provides-Extra: google
Requires-Dist: google-genai>=0.3.0; extra == "google"
Provides-Extra: otlp
Requires-Dist: opentelemetry-exporter-otlp>=1.20.0; extra == "otlp"
Provides-Extra: data-quality
Requires-Dist: scipy>=1.10.0; extra == "data-quality"
Provides-Extra: transformers
Requires-Dist: transformers>=4.51.0; extra == "transformers"
Provides-Extra: llm-all
Requires-Dist: anthropic>=0.5.0; extra == "llm-all"
Requires-Dist: google-genai>=0.3.0; extra == "llm-all"
Provides-Extra: postgresql
Requires-Dist: asyncpg>=0.27.0; extra == "postgresql"
Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgresql"
Requires-Dist: sqlglot>=25.0.0; extra == "postgresql"
Provides-Extra: mysql
Requires-Dist: aiomysql>=0.2.0; extra == "mysql"
Requires-Dist: SQLAlchemy>=2.0.0; extra == "mysql"
Requires-Dist: sqlglot>=25.0.0; extra == "mysql"
Provides-Extra: mongodb
Requires-Dist: motor>=3.0.0; extra == "mongodb"
Provides-Extra: sqlite
Requires-Dist: aiosqlite>=0.21.0; extra == "sqlite"
Requires-Dist: SQLAlchemy>=2.0.0; extra == "sqlite"
Requires-Dist: sqlglot>=25.0.0; extra == "sqlite"
Provides-Extra: snowflake
Requires-Dist: snowflake-connector-python>=3.0.0; extra == "snowflake"
Requires-Dist: cryptography>=3.0.0; extra == "snowflake"
Requires-Dist: sqlglot>=25.0.0; extra == "snowflake"
Provides-Extra: elasticsearch
Requires-Dist: elasticsearch>=8.0.0; extra == "elasticsearch"
Provides-Extra: opensearch
Requires-Dist: opensearch-py>=2.0.0; extra == "opensearch"
Provides-Extra: github
Requires-Dist: httpx>=0.24.0; extra == "github"
Provides-Extra: bigquery
Requires-Dist: google-cloud-bigquery>=3.0.0; extra == "bigquery"
Requires-Dist: sqlglot>=25.0.0; extra == "bigquery"
Provides-Extra: databases
Requires-Dist: asyncpg>=0.27.0; extra == "databases"
Requires-Dist: psycopg2-binary>=2.9.0; extra == "databases"
Requires-Dist: aiomysql>=0.2.0; extra == "databases"
Requires-Dist: SQLAlchemy>=2.0.0; extra == "databases"
Requires-Dist: motor>=3.0.0; extra == "databases"
Requires-Dist: aiosqlite>=0.21.0; extra == "databases"
Requires-Dist: snowflake-connector-python>=3.0.0; extra == "databases"
Requires-Dist: elasticsearch>=8.0.0; extra == "databases"
Requires-Dist: google-cloud-bigquery>=3.0.0; extra == "databases"
Requires-Dist: sqlglot>=25.0.0; extra == "databases"
Provides-Extra: chromadb
Requires-Dist: chromadb>=1.3.0; extra == "chromadb"
Provides-Extra: pinecone
Requires-Dist: pinecone>=5.0.0; extra == "pinecone"
Provides-Extra: qdrant
Requires-Dist: qdrant-client>=1.16.0; extra == "qdrant"
Provides-Extra: vectordb
Requires-Dist: chromadb>=1.3.0; extra == "vectordb"
Requires-Dist: pinecone>=5.0.0; extra == "vectordb"
Requires-Dist: qdrant-client>=1.16.0; extra == "vectordb"
Provides-Extra: aws
Requires-Dist: boto3>=1.38.0; extra == "aws"
Provides-Extra: azure
Requires-Dist: azure-identity>=1.16.0; extra == "azure"
Requires-Dist: azure-mgmt-apimanagement>=4.0.0; extra == "azure"
Requires-Dist: azure-mgmt-cosmosdb>=9.0.0; extra == "azure"
Requires-Dist: azure-mgmt-eventhub>=11.0.0; extra == "azure"
Requires-Dist: azure-mgmt-rdbms>=10.0.0; extra == "azure"
Requires-Dist: azure-mgmt-redis>=14.0.0; extra == "azure"
Requires-Dist: azure-mgmt-servicebus>=8.0.0; extra == "azure"
Requires-Dist: azure-mgmt-sql>=3.0.0; extra == "azure"
Requires-Dist: azure-mgmt-storage>=21.0.0; extra == "azure"
Requires-Dist: azure-mgmt-subscription>=3.0.0; extra == "azure"
Requires-Dist: azure-storage-blob>=12.0.0; extra == "azure"
Provides-Extra: gcp
Requires-Dist: google-auth>=2.0.0; extra == "gcp"
Requires-Dist: google-cloud-resource-manager>=1.0.0; extra == "gcp"
Requires-Dist: google-cloud-storage>=2.0.0; extra == "gcp"
Requires-Dist: google-api-python-client>=2.0.0; extra == "gcp"
Requires-Dist: google-cloud-firestore>=2.0.0; extra == "gcp"
Requires-Dist: google-cloud-bigtable>=2.0.0; extra == "gcp"
Requires-Dist: google-cloud-pubsub>=2.0.0; extra == "gcp"
Requires-Dist: google-cloud-redis>=2.0.0; extra == "gcp"
Requires-Dist: google-cloud-api-gateway>=1.0.0; extra == "gcp"
Requires-Dist: google-cloud-bigquery>=3.0.0; extra == "gcp"
Provides-Extra: google-drive
Requires-Dist: google-api-python-client>=2.0.0; extra == "google-drive"
Requires-Dist: google-auth>=2.0.0; extra == "google-drive"
Requires-Dist: google-auth-oauthlib>=1.0.0; extra == "google-drive"
Requires-Dist: pypdf>=3.0.0; extra == "google-drive"
Requires-Dist: python-docx>=1.0.0; extra == "google-drive"
Provides-Extra: slack
Requires-Dist: slack-sdk>=3.31.0; extra == "slack"
Provides-Extra: mcp
Requires-Dist: mcp>=1.0.0; extra == "mcp"
Provides-Extra: neo4j
Requires-Dist: neo4j>=5.0.0; extra == "neo4j"
Provides-Extra: redis
Requires-Dist: redis>=4.0.0; extra == "redis"
Provides-Extra: lineage
Requires-Dist: networkx>=3.0; extra == "lineage"
Provides-Extra: cloud
Requires-Dist: boto3>=1.38.0; extra == "cloud"
Requires-Dist: azure-identity>=1.16.0; extra == "cloud"
Requires-Dist: azure-mgmt-apimanagement>=4.0.0; extra == "cloud"
Requires-Dist: azure-mgmt-cosmosdb>=9.0.0; extra == "cloud"
Requires-Dist: azure-mgmt-eventhub>=11.0.0; extra == "cloud"
Requires-Dist: azure-mgmt-rdbms>=10.0.0; extra == "cloud"
Requires-Dist: azure-mgmt-redis>=14.0.0; extra == "cloud"
Requires-Dist: azure-mgmt-servicebus>=8.0.0; extra == "cloud"
Requires-Dist: azure-mgmt-sql>=3.0.0; extra == "cloud"
Requires-Dist: azure-mgmt-storage>=21.0.0; extra == "cloud"
Requires-Dist: azure-mgmt-subscription>=3.0.0; extra == "cloud"
Requires-Dist: azure-storage-blob>=12.0.0; extra == "cloud"
Requires-Dist: google-api-python-client>=2.0.0; extra == "cloud"
Requires-Dist: google-auth>=2.0.0; extra == "cloud"
Requires-Dist: google-auth-oauthlib>=1.0.0; extra == "cloud"
Requires-Dist: google-cloud-resource-manager>=1.0.0; extra == "cloud"
Requires-Dist: google-cloud-storage>=2.0.0; extra == "cloud"
Requires-Dist: google-cloud-firestore>=2.0.0; extra == "cloud"
Requires-Dist: google-cloud-bigtable>=2.0.0; extra == "cloud"
Requires-Dist: google-cloud-pubsub>=2.0.0; extra == "cloud"
Requires-Dist: google-cloud-redis>=2.0.0; extra == "cloud"
Requires-Dist: google-cloud-api-gateway>=1.0.0; extra == "cloud"
Requires-Dist: google-cloud-bigquery>=3.0.0; extra == "cloud"
Requires-Dist: pypdf>=3.0.0; extra == "cloud"
Requires-Dist: python-docx>=1.0.0; extra == "cloud"
Requires-Dist: slack-sdk>=3.31.0; extra == "cloud"
Provides-Extra: web
Requires-Dist: beautifulsoup4>=4.13.0; extra == "web"
Requires-Dist: lxml>=5.4.0; extra == "web"
Provides-Extra: api-server
Requires-Dist: fastapi>=0.115.0; extra == "api-server"
Requires-Dist: uvicorn>=0.34.0; extra == "api-server"
Requires-Dist: python-multipart>=0.0.20; extra == "api-server"
Provides-Extra: websearch
Requires-Dist: tavily-python>=0.3.0; extra == "websearch"
Provides-Extra: exa
Requires-Dist: exa-py>=2.0.0; extra == "exa"
Provides-Extra: complete
Requires-Dist: anthropic>=0.5.0; extra == "complete"
Requires-Dist: google-genai>=0.3.0; extra == "complete"
Requires-Dist: asyncpg>=0.27.0; extra == "complete"
Requires-Dist: psycopg2-binary>=2.9.0; extra == "complete"
Requires-Dist: SQLAlchemy>=2.0.0; extra == "complete"
Requires-Dist: pandas>=1.5.0; extra == "complete"
Requires-Dist: numpy>=1.20.0; extra == "complete"
Requires-Dist: beautifulsoup4>=4.13.0; extra == "complete"
Requires-Dist: lxml>=5.4.0; extra == "complete"
Requires-Dist: jsonpath-ng>=1.7.0; extra == "complete"
Requires-Dist: tavily-python>=0.3.0; extra == "complete"
Requires-Dist: exa-py>=2.0.0; extra == "complete"
Requires-Dist: boto3>=1.38.0; extra == "complete"
Requires-Dist: google-api-python-client>=2.0.0; extra == "complete"
Requires-Dist: google-auth>=2.0.0; extra == "complete"
Requires-Dist: google-auth-oauthlib>=1.0.0; extra == "complete"
Requires-Dist: pypdf>=3.0.0; extra == "complete"
Requires-Dist: python-docx>=1.0.0; extra == "complete"
Requires-Dist: slack-sdk>=3.31.0; extra == "complete"
Provides-Extra: all
Requires-Dist: anthropic>=0.5.0; extra == "all"
Requires-Dist: google-genai>=0.3.0; extra == "all"
Requires-Dist: transformers>=4.51.0; extra == "all"
Requires-Dist: asyncpg>=0.27.0; extra == "all"
Requires-Dist: psycopg2-binary>=2.9.0; extra == "all"
Requires-Dist: aiomysql>=0.2.0; extra == "all"
Requires-Dist: SQLAlchemy>=2.0.0; extra == "all"
Requires-Dist: motor>=3.0.0; extra == "all"
Requires-Dist: aiosqlite>=0.21.0; extra == "all"
Requires-Dist: snowflake-connector-python>=3.0.0; extra == "all"
Requires-Dist: cryptography>=3.0.0; extra == "all"
Requires-Dist: elasticsearch>=8.0.0; extra == "all"
Requires-Dist: google-cloud-bigquery>=3.0.0; extra == "all"
Requires-Dist: opensearch-py>=2.0.0; extra == "all"
Requires-Dist: chromadb>=1.3.0; extra == "all"
Requires-Dist: pinecone>=5.0.0; extra == "all"
Requires-Dist: qdrant-client>=1.16.0; extra == "all"
Requires-Dist: boto3>=1.38.0; extra == "all"
Requires-Dist: httpx>=0.24.0; extra == "all"
Requires-Dist: google-api-python-client>=2.0.0; extra == "all"
Requires-Dist: google-auth>=2.0.0; extra == "all"
Requires-Dist: google-auth-oauthlib>=1.0.0; extra == "all"
Requires-Dist: google-cloud-resource-manager>=1.0.0; extra == "all"
Requires-Dist: google-cloud-storage>=2.0.0; extra == "all"
Requires-Dist: google-cloud-firestore>=2.0.0; extra == "all"
Requires-Dist: google-cloud-bigtable>=2.0.0; extra == "all"
Requires-Dist: google-cloud-pubsub>=2.0.0; extra == "all"
Requires-Dist: google-cloud-redis>=2.0.0; extra == "all"
Requires-Dist: google-cloud-api-gateway>=1.0.0; extra == "all"
Requires-Dist: pypdf>=3.0.0; extra == "all"
Requires-Dist: python-docx>=1.0.0; extra == "all"
Requires-Dist: slack-sdk>=3.31.0; extra == "all"
Requires-Dist: mcp>=1.0.0; extra == "all"
Requires-Dist: neo4j>=5.0.0; extra == "all"
Requires-Dist: redis>=4.0.0; extra == "all"
Requires-Dist: networkx>=3.0; extra == "all"
Requires-Dist: pandas>=1.5.0; extra == "all"
Requires-Dist: numpy>=1.20.0; extra == "all"
Requires-Dist: openpyxl>=3.1.0; extra == "all"
Requires-Dist: beautifulsoup4>=4.13.0; extra == "all"
Requires-Dist: lxml>=5.4.0; extra == "all"
Requires-Dist: jsonpath-ng>=1.7.0; extra == "all"
Requires-Dist: thefuzz[speedup]>=0.22.0; extra == "all"
Requires-Dist: scipy>=1.10.0; extra == "all"
Requires-Dist: tavily-python>=0.3.0; extra == "all"
Requires-Dist: exa-py>=2.0.0; extra == "all"
Requires-Dist: fastapi>=0.115.0; extra == "all"
Requires-Dist: uvicorn>=0.34.0; extra == "all"
Requires-Dist: python-multipart>=0.0.20; extra == "all"
Requires-Dist: opentelemetry-exporter-otlp>=1.20.0; extra == "all"
Provides-Extra: dev
Requires-Dist: pytest>=8.0.0; extra == "dev"
Requires-Dist: pytest-asyncio>=0.26.0; extra == "dev"
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
Requires-Dist: black>=23.0.0; extra == "dev"
Requires-Dist: mypy>=1.4.0; extra == "dev"
Requires-Dist: pre-commit>=3.0.0; extra == "dev"
Requires-Dist: aiosqlite>=0.21.0; extra == "dev"
Requires-Dist: sqlglot>=25.0.0; extra == "dev"
Requires-Dist: pandas>=1.5.0; extra == "dev"
Dynamic: license-file

# Daita Agents

**Open-source Python SDK for building production AI agents.**

Daita Agents gives you a clean, minimal API for autonomous tool-calling agents that work with any LLM provider — OpenAI, Anthropic, Gemini, Grok, and more. Zero-configuration tracing, pluggable data sources, composable skills, and a workflow system for multi-agent pipelines.

[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](LICENSE)
[![Python](https://img.shields.io/badge/python-3.11%2B-blue)](https://www.python.org)
[![PyPI](https://img.shields.io/badge/pypi-daita--agents-orange)](https://pypi.org/project/daita-agents/)
[![Version](https://img.shields.io/pypi/v/daita-agents.svg)](https://pypi.org/project/daita-agents/)

---

## Quickstart

```bash
pip install daita-agents
```

Point an agent at a database and start asking questions:

```python
import asyncio
from daita import Agent

async def main():
    agent = await Agent.from_db(
        "sqlite:///sales.db",
        model="gpt-4o",
    )

    result = await agent.run("What were the top 5 products by revenue last quarter?")
    print(result)

asyncio.run(main())
```

`Agent.from_db()` inspects the schema, generates tool wrappers, and composes a system prompt — no manual configuration needed.

---

## Features

- **Multi-provider LLM support** — OpenAI, Anthropic, Gemini, Grok (or bring your own)
- **Autonomous tool calling** — agents plan and execute multi-step tool chains without manual orchestration
- **`@tool` decorator** — turn any sync or async Python function into an LLM-callable tool in one line
- **`Agent.from_db()`** — point at a database connection string and get a fully-configured data agent in one call
- **Skills** — reusable, composable units of agent capability that bundle instructions + tools (subclass `BaseSkill` or use the `Skill` helper)
- **Streaming** — real-time event-based output via `agent.stream()` or `on_event` callback
- **Conversation history** — stateful multi-turn sessions with local persistence
- **Plugin ecosystem** — PostgreSQL, MySQL, MongoDB, SQLite, BigQuery, Snowflake, S3, Slack, Elasticsearch, Pinecone, ChromaDB, Qdrant, Neo4j, Redis, MCP, and more
- **Embeddings** — pluggable providers (OpenAI, Gemini, Voyage, sentence-transformers) via `BaseEmbeddingProvider`
- **Memory** — persistent semantic memory with working memory, memory graph, and automatic local/cloud detection
- **Watch system** — monitor databases and APIs continuously; trigger agent actions when thresholds are crossed
- **Workflows** — connect multiple agents into pipelines via relay channels
- **Data quality enforcement** — `ItemAssertion` + `query_checked()` validate every row and fail fast with structured violations
- **Agent evals** — developer-preview eval suites for checking answers, tools, SQL, data operations, skills, plugins, budgets, baselines, and optional LLM judges
- **Agent graph** — built-in graph backend powering lineage & catalog; expose traversal tools to agents with `register_graph_tools()`
- **Zero-config tracing** — every LLM call and tool execution is automatically traced (tokens, latency, cost); optional OTLP export to Datadog, Jaeger, Honeycomb, etc.
- **Retry & reliability** — configurable exponential backoff with permanent-error detection
- **Focus DSL** — pre-filter tool results before the LLM sees them, reducing token usage

---

## Examples

### Custom tools with `@tool`

```python
import asyncio
from daita import Agent, tool

@tool
def search_products(query: str, max_results: int = 5) -> list:
    """Search the product catalog.

    Args:
        query: Search terms
        max_results: Maximum number of results to return
    """
    return [{"name": "Widget A", "price": 9.99}]

@tool
def calculate_discount(price: float, pct: float) -> float:
    """Calculate a discounted price.

    Args:
        price: Original price
        pct: Discount percentage (0-100)
    """
    return round(price * (1 - pct / 100), 2)

async def main():
    agent = Agent(
        name="Shopping Assistant",
        llm_provider="openai",
        model="gpt-4o",
        tools=[search_products, calculate_discount],
    )

    result = await agent.run("Find me a widget and apply a 15% discount.")
    print(result)

asyncio.run(main())
```

Both sync and async functions work with `@tool`. Parameter types and descriptions are auto-extracted from type hints and docstrings.

---

### Database agent with `Agent.from_db()`

The fastest way to build a data agent. Pass a connection string (or plugin instance) and get a fully-configured agent with schema-aware tools, an auto-generated system prompt, and optional lineage/memory:

```python
import asyncio
from daita import Agent

async def main():
    agent = await Agent.from_db(
        "postgresql://user:pass@localhost/sales_db",
        model="gpt-4o",
        lineage=True,   # track data lineage automatically
        memory=True,    # remember business context across sessions
    )

    result = await agent.run("What were our top 5 products by revenue last quarter?")
    print(result)

asyncio.run(main())
```

You can also add a database plugin manually for more control:

```python
from daita import Agent
from daita.plugins import postgresql

agent = Agent(name="Sales Analyst", llm_provider="openai", model="gpt-4o")
agent.add_plugin(postgresql(host="localhost", database="sales_db", user="analyst", password="secret"))

result = await agent.run("What were the top 5 products by revenue last quarter?")
```

---

### Skills — reusable units of capability

Skills bundle domain instructions with a set of tools. Use the `Skill` helper for simple cases, or subclass `BaseSkill` when you need dynamic instructions or plugin dependencies.

```python
import asyncio
from daita import Agent, Skill, tool

@tool
def format_report(data: list, title: str) -> str:
    """Render a markdown report."""
    rows = "\n".join(f"- {r}" for r in data)
    return f"# {title}\n\n{rows}"

@tool
def generate_chart(series: list, kind: str = "bar") -> str:
    """Generate a chart description."""
    return f"{kind} chart with {len(series)} series"

report_skill = Skill(
    name="report_gen",
    description="Produces polished analytical reports",
    instructions="Always render results as markdown with a title and bulleted rows.",
    tools=[format_report, generate_chart],
)

async def main():
    agent = Agent(name="Analyst", llm_provider="openai", model="gpt-4o")
    agent.add_skill(report_skill)

    result = await agent.run("Summarize Q3 revenue with a chart.")
    print(result)

asyncio.run(main())
```

For skills that need plugin access, subclass `BaseSkill` and declare `requires()`:

```python
from daita import BaseSkill
from daita.plugins.base_db import BaseDatabasePlugin

class MigrationsSkill(BaseSkill):
    name = "migrations"
    instructions = "Follow forward-only migration policy."

    def requires(self):
        return {"db": BaseDatabasePlugin}
```

---

### Data quality enforcement with `ItemAssertion`

Validate every row returned by a database query; violations raise `DataQualityError` (permanent, non-retried) with the full list attached.

```python
import asyncio
from daita import ItemAssertion, DataQualityError
from daita.plugins import postgresql

async def main():
    async with postgresql(host="localhost", database="sales_db") as db:
        try:
            rows = await db.query_checked(
                "SELECT id, amount, customer_id FROM transactions WHERE day = CURRENT_DATE",
                assertions=[
                    ItemAssertion(lambda r: r["amount"] > 0, "All amounts must be positive"),
                    ItemAssertion(lambda r: r["customer_id"] is not None, "Every row needs a customer_id"),
                ],
            )
            print(f"{len(rows)} clean rows")
        except DataQualityError as exc:
            print(f"Data quality failure: {exc}")

asyncio.run(main())
```

---

### Agent evals

Agent evals are a developer-preview system for testing runnable Daita agents locally or in CI. An eval suite loads an agent through a Python factory, runs one or more prompts, and writes structured artifacts that show what passed, failed, and changed.

Eval suites can check:

- final-answer text and numeric values
- required or forbidden tools
- SQL safety and query shape
- non-SQL data operations across files, APIs, storage, and vector search
- skill and plugin usage, latency, and errors
- cost, latency, token, and iteration budgets
- repeat-run stability
- baselines and optional structured LLM judges

```yaml
name: sales-agent-evals
version: 1

agent:
  factory: "myapp.agents:create_sales_agent"
  kwargs:
    model: gpt-4o-mini

defaults:
  runs: 2
  max_iterations: 8

cases:
  - id: top-products
    prompt: What were the top 5 products by revenue?
    expectations:
      answer:
        contains: ["Widget A"]
        numeric:
          - label: revenue
            expected: 12840.50
            tolerance: 0.01
      tools:
        required: ["sqlite_query"]
        max_calls: 4
      sql:
        read_only: true
        require_limit: true
        must_include: ["SUM", "GROUP BY"]
        must_not_include: ["DELETE", "DROP"]
      skills:
        required: ["schema_discovery"]
        max_errors: 0
      plugins:
        required: ["sqlite"]
        max_latency_ms: 3000
      budgets:
        max_tokens: 8000
        max_latency_ms: 15000
      stability:
        require_same_tools: true
        max_answer_variants: 1
```

Run a suite from Python:

```python
import asyncio
from daita.evals import EvalSuite
from daita.evals.reporters import render_pretty

async def main():
    report = await EvalSuite.from_file("evals/sales-agent.yaml").run()
    print(render_pretty(report))

asyncio.run(main())
```

Eval runs write `report.json`, `summary.md`, JUnit XML, per-case artifacts, per-run artifacts, repeat-run diffs, judge artifacts, and baseline comparisons. The CLI command (`daita eval`) is planned; use the Python API while evals are in developer preview.

---

### Streaming with `agent.stream()`

Use `agent.stream()` to receive real-time events as an async generator:

```python
import asyncio
from daita import Agent
from daita.core.streaming import EventType

async def main():
    agent = Agent(name="assistant", llm_provider="openai", model="gpt-4o")

    async for event in agent.stream("Explain transformer attention mechanisms"):
        if event.type == EventType.THINKING:
            print(event.content, end="", flush=True)
        elif event.type == EventType.TOOL_CALL:
            print(f"\n[calling {event.tool_name}]")
        elif event.type == EventType.COMPLETE:
            print(f"\n\nDone. Tokens used: {event.token_usage}")

asyncio.run(main())
```

Alternatively, pass an `on_event` callback to `run()`:

```python
await agent.run("...", on_event=lambda e: print(e))
```

---

### Multi-turn conversations with `ConversationHistory`

```python
import asyncio
from daita import Agent, ConversationHistory

async def main():
    agent = Agent(name="Support Bot", llm_provider="anthropic", model="claude-sonnet-4-6")
    history = ConversationHistory(session_id="alice-session")

    await agent.run("My name is Alice and I prefer concise answers.", history=history)
    result = await agent.run("What's my name and preference?", history=history)
    print(result)  # "Your name is Alice and you prefer concise answers."

asyncio.run(main())
```

Sessions persist to `.daita/sessions/` between process restarts.

---

### Monitor data sources with `@agent.watch()`

Continuously poll a data source and trigger the agent when a threshold is crossed:

```python
import asyncio
from daita import Agent, WatchEvent
from daita.plugins import postgresql

db = postgresql(host="localhost", database="ops_db")
agent = Agent(name="Ops Monitor", llm_provider="openai", model="gpt-4o")
agent.add_plugin(db)

@agent.watch(
    source=db,
    condition="SELECT COUNT(*) FROM failed_jobs WHERE created_at > NOW() - INTERVAL '5m'",
    threshold=lambda v: v > 10,
    interval="1m",
)
async def on_job_failures(event: WatchEvent):
    await agent.run(f"There are {event.value} failed jobs in the last 5 minutes. Diagnose and suggest fixes.")

asyncio.run(agent.start())
```

Watches start lazily on the first `run()` call, or explicitly with `await agent.start()`.

---

### Multi-agent workflow

```python
import asyncio
from daita import Agent, Workflow

async def main():
    fetcher  = Agent(name="Data Fetcher",  llm_provider="openai", model="gpt-4o")
    analyzer = Agent(name="Analyzer",      llm_provider="openai", model="gpt-4o")

    workflow = Workflow("Sales Pipeline")
    workflow.add_agent("fetcher",  fetcher)
    workflow.add_agent("analyzer", analyzer)
    workflow.connect("fetcher", "raw_data", "analyzer")

    await workflow.start()
    await workflow.inject_data("fetcher", {"query": "Q3 sales"}, task="fetch")
    await workflow.stop()

asyncio.run(main())
```

---

### Memory-enabled agent

```python
import asyncio
from daita import Agent
from daita.plugins import memory

async def main():
    agent = Agent(name="Assistant", llm_provider="anthropic", model="claude-sonnet-4-6")
    agent.add_plugin(memory())

    await agent.run("My name is Alex and I prefer concise answers.")
    result = await agent.run("What's my preference?")
    print(result)

asyncio.run(main())
```

Memory auto-detects local or cloud backend and includes working memory, fact extraction, contradiction handling, and a memory graph for association.

---

### Custom embedding providers

```python
from daita import BaseEmbeddingProvider
from daita.embeddings import create_embedding_provider

# Built-in: "openai", "gemini", "voyage", "sentence_transformers", "mock"
embedder = create_embedding_provider("voyage", model="voyage-3")
vectors = await embedder.embed(["hello world", "another doc"])
```

Subclass `BaseEmbeddingProvider` to plug in any embedding model you want.

---

### Vector database search

```python
import asyncio
from daita import Agent
from daita.plugins import chroma

async def main():
    agent = Agent(name="Knowledge Assistant", llm_provider="openai", model="gpt-4o")
    agent.add_plugin(chroma(path="./vectors", collection="docs"))

    result = await agent.run("What do our docs say about authentication?")
    print(result)

asyncio.run(main())
```

---

### Expose graph traversal to agents

Lineage and catalog plugins populate a shared agent graph automatically. Call `register_graph_tools()` to let the agent traverse it directly:

```python
from daita import Agent
from daita.plugins import lineage
from daita.core.graph import register_graph_tools

agent = Agent(name="Impact Analyst", llm_provider="openai", model="gpt-4o")
agent.add_plugin(lineage())
register_graph_tools(agent)   # adds graph_subgraph, graph_shortest_path, impact_analysis

await agent.run("What downstream tables break if we drop customers.email?")
```

---

### OTLP tracing export

```python
from daita import configure_tracing

configure_tracing(
    exporter="otlp",
    endpoint="https://otel.example.com",
    service_name="my-daita-agent",
)
```

Install with `pip install "daita-agents[otlp]"` to enable the OTLP exporter. Spans cover LLM calls, tool invocations, retries, and plugin operations.

---

### MCP (Model Context Protocol) integration

```python
import asyncio
from daita import Agent
from daita.plugins import mcp

async def main():
    agent = Agent(
        name="File Analyst",
        llm_provider="openai",
        model="gpt-4o",
        mcp=mcp.server(command="uvx", args=["mcp-server-filesystem", "/data"]),
    )

    result = await agent.run("Read report.csv and summarize the totals.")
    print(result)

asyncio.run(main())
```

---

## Plugins

### Databases

| Plugin          | Description                           | Extra             |
| --------------- | ------------------------------------- | ----------------- |
| `postgresql`    | Query and write PostgreSQL (pgvector) | `[postgresql]`    |
| `mysql`         | Query and write MySQL                 | `[mysql]`         |
| `mongodb`       | Query MongoDB collections             | `[mongodb]`       |
| `sqlite`        | Query and write SQLite                | `[sqlite]`        |
| `snowflake`     | Query Snowflake data warehouse        | `[snowflake]`     |
| `bigquery`      | Query Google BigQuery                 | `[bigquery]`      |
| `elasticsearch` | Search Elasticsearch indices          | `[elasticsearch]` |

### Vector Databases

| Plugin     | Description                  | Extra        |
| ---------- | ---------------------------- | ------------ |
| `chroma`   | Local/embedded vector search | `[chromadb]` |
| `pinecone` | Managed cloud vector search  | `[pinecone]` |
| `qdrant`   | Self-hosted vector search    | `[qdrant]`   |

### Integrations & Cloud

| Plugin            | Description                      | Extra            |
| ----------------- | -------------------------------- | ---------------- |
| `rest`            | Call REST APIs                   | _(included)_     |
| `s3`              | Read/write S3 objects            | `[aws]`          |
| `slack`           | Send Slack messages              | `[slack]`        |
| `email`           | Send/receive email (SMTP/IMAP)   | _(included)_     |
| `google_drive`    | Read files from Google Drive     | `[google-drive]` |
| `websearch`       | AI-optimized web search (Tavily) | `[websearch]`    |
| `exa_search`      | AI-powered semantic search (Exa) | `[exa]`          |
| `mcp`             | Model Context Protocol servers   | `[mcp]`          |
| `redis_messaging` | Redis pub/sub messaging          | `[redis]`        |
| `redis`           | Redis data store operations      | `[redis]`        |
| `neo4j`           | Graph database (Cypher queries)  | `[neo4j]`        |

### Knowledge & Orchestration

| Plugin         | Description                                 |
| -------------- | ------------------------------------------- |
| `memory`       | Persistent semantic agent memory            |
| `catalog`      | Schema discovery and metadata management    |
| `lineage`      | Data lineage tracking and impact analysis   |
| `orchestrator` | Multi-agent coordination and task routing   |
| `data_quality` | Data profiling and quality checks           |
| `transformer`  | SQL transformation management and execution |

---

## Installation

### Core (OpenAI included)

```bash
pip install daita-agents
```

### LLM providers

```bash
pip install "daita-agents[anthropic]"   # Claude
pip install "daita-agents[google]"      # Gemini
pip install "daita-agents[llm-all]"     # All LLM providers
```

### Database plugins

```bash
pip install "daita-agents[postgresql]"
pip install "daita-agents[mysql]"
pip install "daita-agents[mongodb]"
pip install "daita-agents[sqlite]"
pip install "daita-agents[bigquery]"
pip install "daita-agents[snowflake]"
pip install "daita-agents[databases]"   # All traditional databases
```

### Vector database plugins

```bash
pip install "daita-agents[chromadb]"
pip install "daita-agents[pinecone]"
pip install "daita-agents[qdrant]"
pip install "daita-agents[vectordb]"    # All vector databases
```

### Embedding providers

```bash
pip install "daita-agents[voyage]"                # Voyage AI
pip install "daita-agents[sentence-transformers]" # Local sentence-transformers
```

### Cloud

```bash
pip install "daita-agents[aws]"          # boto3
pip install "daita-agents[gcp]"          # Google Cloud services
pip install "daita-agents[google-drive]" # Drive + document parsers
pip install "daita-agents[cloud]"        # All cloud integrations
```

### Observability & production

```bash
pip install "daita-agents[otlp]"         # Export traces to OTLP collectors
pip install "daita-agents[api-server]"   # FastAPI + Uvicorn
pip install "daita-agents[production]"   # AWS + API server
```

### Data & content

```bash
pip install "daita-agents[data]"         # pandas, numpy, openpyxl, parsing libs
pip install "daita-agents[web]"          # beautifulsoup4, lxml
pip install "daita-agents[data-quality]" # Advanced quality checks (scipy)
pip install "daita-agents[lineage]"      # networkx graph support
```

### Bundles

```bash
pip install "daita-agents[recommended]"  # Anthropic + pandas + beautifulsoup4
pip install "daita-agents[complete]"     # Most features, no heavy packages
pip install "daita-agents[all]"          # Everything (large install)
```

---

## Exception hierarchy

All exceptions are importable from `daita`:

`DaitaError` → `AgentError`, `LLMError`, `ConfigError`, `PluginError`, `SkillError`, `WorkflowError`, `TransientError`, `RetryableError`, `PermanentError`, `RateLimitError`, `AuthenticationError`, `ValidationError`, `FocusDSLError`, `DataQualityError`

---

## Documentation

See the [`examples/`](examples/) directory for full working examples, or the [documentation](https://docs.daita-tech.io).

---

## Contributing

See [CONTRIBUTING.md](CONTRIBUTING.md). All contributions are welcome.

## License

Apache 2.0 — see [LICENSE](LICENSE).

---

_Built by [Daita](https://daita-tech.io)_
