LICENSE
README.md
pyproject.toml
setup.py
evals/__init__.py
evals/version.py
evals/benchmark/__init__.py
evals/benchmark/benchmark.py
evals/benchmark/utils.py
evals/benchmark/stresscli/__init__.py
evals/benchmark/stresscli/stresscli.py
evals/benchmark/stresscli/commands/__init__.py
evals/benchmark/stresscli/commands/dump.py
evals/benchmark/stresscli/commands/load_test.py
evals/benchmark/stresscli/commands/main.py
evals/benchmark/stresscli/commands/metrics.py
evals/benchmark/stresscli/commands/metrics_docker.py
evals/benchmark/stresscli/commands/metrics_util.py
evals/benchmark/stresscli/commands/report.py
evals/benchmark/stresscli/commands/utilization.py
evals/benchmark/stresscli/commands/utils.py
evals/benchmark/stresscli/commands/validate.py
evals/benchmark/stresscli/locust/__init__.py
evals/benchmark/stresscli/locust/aistress.py
evals/benchmark/stresscli/locust/audioqnabench.py
evals/benchmark/stresscli/locust/audioqnafixed.py
evals/benchmark/stresscli/locust/chatqna_qlist_pubmed.py
evals/benchmark/stresscli/locust/chatqnabench.py
evals/benchmark/stresscli/locust/chatqnafixed.py
evals/benchmark/stresscli/locust/codegenbench.py
evals/benchmark/stresscli/locust/codegenfixed.py
evals/benchmark/stresscli/locust/codetransbench.py
evals/benchmark/stresscli/locust/codetransfixed.py
evals/benchmark/stresscli/locust/constant_load_shape.py
evals/benchmark/stresscli/locust/docsumbench.py
evals/benchmark/stresscli/locust/docsumfixed.py
evals/benchmark/stresscli/locust/embeddingfixed.py
evals/benchmark/stresscli/locust/embedservefixed.py
evals/benchmark/stresscli/locust/faqgenbench.py
evals/benchmark/stresscli/locust/faqgenfixed.py
evals/benchmark/stresscli/locust/gmc_chatqnafixed.py
evals/benchmark/stresscli/locust/llmfixed.py
evals/benchmark/stresscli/locust/llmservefixed.py
evals/benchmark/stresscli/locust/locust.conf
evals/benchmark/stresscli/locust/poisson_load_shape.py
evals/benchmark/stresscli/locust/rerankingfixed.py
evals/benchmark/stresscli/locust/rerankservefixed.py
evals/benchmark/stresscli/locust/retrieverfixed.py
evals/benchmark/stresscli/locust/tokenresponse.py
evals/benchmark/stresscli/locust/visualqnabench.py
evals/benchmark/stresscli/locust/visualqnafixed.py
evals/evaluation/__init__.py
evals/evaluation/bigcode_evaluation_harness/__init__.py
evals/evaluation/bigcode_evaluation_harness/accuracy.py
evals/evaluation/bigcode_evaluation_harness/api_evaluator.py
evals/evaluation/bigcode_evaluation_harness/arguments.py
evals/evaluation/lm_evaluation_harness/__init__.py
evals/evaluation/lm_evaluation_harness/accuracy.py
evals/evaluation/lm_evaluation_harness/arguments.py
evals/evaluation/lm_evaluation_harness/lm_eval/__init__.py
evals/evaluation/lm_evaluation_harness/lm_eval/evaluator.py
evals/evaluation/lm_evaluation_harness/lm_eval/models/__init__.py
evals/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py
evals/evaluation/rag_eval/__init__.py
evals/evaluation/rag_eval/evaluator.py
evals/evaluation/rag_eval/template.py
evals/evaluation/toxicity_eval/__init__.py
evals/evaluation/toxicity_eval/benchmark_classification_metrics.py
evals/metrics/__init__.py
evals/metrics/utils.py
evals/metrics/answer_relevancy/__init__.py
evals/metrics/answer_relevancy/answer_relevancy.py
evals/metrics/answer_relevancy/template.py
evals/metrics/bias/__init__.py
evals/metrics/bias/bias.py
evals/metrics/bias/schema.py
evals/metrics/bias/template.py
evals/metrics/hallucination/__init__.py
evals/metrics/hallucination/hallucination.py
evals/metrics/hallucination/schema.py
evals/metrics/hallucination/template.py
evals/metrics/ragaaf/__init__.py
evals/metrics/ragaaf/prompt_engineering.py
evals/metrics/ragaaf/rag_dataset.py
evals/metrics/ragaaf/run_eval.py
evals/metrics/ragaaf/prompt_templates/__init__.py
evals/metrics/ragaaf/prompt_templates/context_recall.py
evals/metrics/ragaaf/prompt_templates/context_relevance.py
evals/metrics/ragaaf/prompt_templates/correctness.py
evals/metrics/ragaaf/prompt_templates/factualness.py
evals/metrics/ragaaf/prompt_templates/opening_prompt.py
evals/metrics/ragaaf/prompt_templates/readability.py
evals/metrics/ragaaf/prompt_templates/relevance.py
evals/metrics/ragaaf/utils/__init__.py
evals/metrics/ragaaf/utils/helper.py
evals/metrics/ragaaf/utils/model.py
evals/metrics/ragaaf/utils/retry.py
evals/metrics/ragas/__init__.py
evals/metrics/ragas/ragas.py
evals/metrics/retrieval/__init__.py
evals/metrics/retrieval/retrieval.py
evals/metrics/summarization/__init__.py
evals/metrics/summarization/summarization.py
evals/metrics/summarization/template.py
evals/metrics/toxicity/__init__.py
evals/metrics/toxicity/schema.py
evals/metrics/toxicity/template.py
evals/metrics/toxicity/toxicity.py
opea_eval.egg-info/PKG-INFO
opea_eval.egg-info/SOURCES.txt
opea_eval.egg-info/dependency_links.txt
opea_eval.egg-info/top_level.txt
tests/test_answer_relevancy.py
tests/test_bias.py
tests/test_bigcode_eval.py
tests/test_hallucination.py
tests/test_lm_eval.py
tests/test_model_card_gen.py
tests/test_ragaaf.py
tests/test_ragas.py
tests/test_retrieval_metric.py
tests/test_toxicity.py
tests/test_toxicity_eval.py