LICENSE
README.md
setup.py
llmgrader/__init__.py
llmgrader/advanced.py
llmgrader/dataset.py
llmgrader/evaluate.py
llmgrader/pytest_plugin.py
llmgrader/synthesizer.py
llmgrader/test_case.py
llmgrader.egg-info/PKG-INFO
llmgrader.egg-info/SOURCES.txt
llmgrader.egg-info/dependency_links.txt
llmgrader.egg-info/entry_points.txt
llmgrader.egg-info/requires.txt
llmgrader.egg-info/top_level.txt
llmgrader/benchmarks/__init__.py
llmgrader/benchmarks/base.py
llmgrader/benchmarks/gsm8k.py
llmgrader/benchmarks/hellaswag.py
llmgrader/benchmarks/mmlu.py
llmgrader/cli/__init__.py
llmgrader/cli/main.py
llmgrader/integrations/__init__.py
llmgrader/integrations/crewai.py
llmgrader/integrations/langchain.py
llmgrader/integrations/llamaindex.py
llmgrader/metrics/__init__.py
llmgrader/metrics/base.py
llmgrader/metrics/agentic/__init__.py
llmgrader/metrics/agentic/argument_correctness.py
llmgrader/metrics/agentic/goal_accuracy.py
llmgrader/metrics/agentic/plan_adherence.py
llmgrader/metrics/agentic/plan_quality.py
llmgrader/metrics/agentic/step_efficiency.py
llmgrader/metrics/agentic/task_completion.py
llmgrader/metrics/agentic/tool_correctness.py
llmgrader/metrics/conversational/__init__.py
llmgrader/metrics/conversational/completeness.py
llmgrader/metrics/conversational/conversational_dag.py
llmgrader/metrics/conversational/conversational_geval.py
llmgrader/metrics/conversational/knowledge_retention.py
llmgrader/metrics/conversational/relevancy.py
llmgrader/metrics/conversational/role_adherence.py
llmgrader/metrics/conversational/turn_contextual_metrics.py
llmgrader/metrics/conversational/turn_faithfulness.py
llmgrader/metrics/conversational/turn_relevancy.py
llmgrader/metrics/custom/__init__.py
llmgrader/metrics/custom/dag.py
llmgrader/metrics/custom/geval.py
llmgrader/metrics/multimodal/__init__.py
llmgrader/metrics/multimodal/image_coherence.py
llmgrader/metrics/multimodal/image_editing.py
llmgrader/metrics/multimodal/image_helpfulness.py
llmgrader/metrics/multimodal/image_reference.py
llmgrader/metrics/multimodal/text_to_image.py
llmgrader/metrics/other/__init__.py
llmgrader/metrics/other/arena_geval.py
llmgrader/metrics/other/exact_match.py
llmgrader/metrics/other/json_correctness.py
llmgrader/metrics/other/pattern_match.py
llmgrader/metrics/other/prompt_alignment.py
llmgrader/metrics/other/summarization.py
llmgrader/metrics/other/topic_adherence.py
llmgrader/metrics/rag/__init__.py
llmgrader/metrics/rag/answer_relevancy.py
llmgrader/metrics/rag/contextual_precision.py
llmgrader/metrics/rag/contextual_recall.py
llmgrader/metrics/rag/contextual_relevancy.py
llmgrader/metrics/rag/faithfulness.py
llmgrader/metrics/safety/__init__.py
llmgrader/metrics/safety/bias.py
llmgrader/metrics/safety/hallucination.py
llmgrader/metrics/safety/misuse.py
llmgrader/metrics/safety/non_advice.py
llmgrader/metrics/safety/pii_leakage.py
llmgrader/metrics/safety/role_violation.py
llmgrader/metrics/safety/toxicity.py
llmgrader/providers/__init__.py
llmgrader/providers/anthropic_provider.py
llmgrader/providers/base.py
llmgrader/providers/ollama_provider.py
llmgrader/providers/openai_provider.py
llmgrader/tracing/__init__.py
llmgrader/tracing/tracer.py
tests/test_core.py