.gitignore
.pre-commit-config.yaml
Makefile
README.md
pyproject.toml
requirements-dev.txt
requirements-examples.txt
requirements-pub.txt
requirements.txt
data/criteria_benchmark.jsonl
data/rag_benchmark.jsonl
data/files/Jane_Gilbert_resume.docx
data/files/Rhinoceros_Wikipedia.pdf
src/examples/criteria_evaluation_agent.py
src/examples/langchain_rag_agent.py
src/examples/openai_assistant_agent.py
src/flow_benchmark_tools.egg-info/PKG-INFO
src/flow_benchmark_tools.egg-info/SOURCES.txt
src/flow_benchmark_tools.egg-info/dependency_links.txt
src/flow_benchmark_tools.egg-info/requires.txt
src/flow_benchmark_tools.egg-info/top_level.txt
src/recursiveai/benchmark/__init__.py
src/recursiveai/benchmark/_internal/__init__.py
src/recursiveai/benchmark/_internal/_benchmark_evaluator.py
src/recursiveai/benchmark/_internal/_benchmark_output.py
src/recursiveai/benchmark/_internal/_criteria_evaluator.py
src/recursiveai/benchmark/_internal/_evaluation.py
src/recursiveai/benchmark/_internal/_run_output.py
src/recursiveai/benchmark/_internal/_evaluators/__init__.py
src/recursiveai/benchmark/_internal/_evaluators/_happy.py
src/recursiveai/benchmark/_internal/_evaluators/_llm_criteria_judge.py
src/recursiveai/benchmark/_internal/_evaluators/_llm_criteria_jury.py
src/recursiveai/benchmark/_internal/_evaluators/_llm_judge.py
src/recursiveai/benchmark/_internal/_evaluators/_llm_jury.py
src/recursiveai/benchmark/_internal/_evaluators/_regex_match.py
src/recursiveai/benchmark/_internal/_evaluators/_strict_match.py
src/recursiveai/benchmark/_internal/_llm/_anthropic_claude_model.py
src/recursiveai/benchmark/_internal/_llm/_azure_openai_gpt_model.py
src/recursiveai/benchmark/_internal/_llm/_google_gemini_model.py
src/recursiveai/benchmark/_internal/_llm/_llm_model.py
src/recursiveai/benchmark/_internal/_llm/_openai_gpt_model.py
src/recursiveai/benchmark/_internal/_metrics/_benchmark_metrics.py
src/recursiveai/benchmark/_internal/_metrics/_run_metrics.py
src/recursiveai/benchmark/_internal/_util/__init__.py
src/recursiveai/benchmark/api/__init__.py
src/recursiveai/benchmark/api/benchmark.py
src/recursiveai/benchmark/api/benchmark_agent.py
src/recursiveai/benchmark/api/benchmark_case.py
src/recursiveai/benchmark/api/benchmark_evaluator.py
src/recursiveai/benchmark/api/benchmark_run.py
src/recursiveai/benchmark/api/benchmark_runner.py
src/recursiveai/benchmark/api/exit_code.py
src/recursiveai/benchmark/api/agents/__init__.py
src/recursiveai/benchmark/api/agents/async_callback_agent.py
src/recursiveai/benchmark/api/agents/callback_agent.py
src/recursiveai/benchmark/api/util/__init__.py
src/tests/conftest.py
src/tests/api/conftest.py
src/tests/api/test_agents.py
src/tests/api/test_benchmarks.py
src/tests/api/test_runner.py
src/tests/internal/test_anthropic.py
src/tests/internal/test_azure.py
src/tests/internal/test_evaluators.py
src/tests/internal/test_google.py
src/tests/internal/test_metrics.py
src/tests/internal/test_openai.py
src/tests/internal/test_util.py