# file: /home/runner/work/aelfrice/aelfrice/benchmarks/verify_clean.py
# hypothesis_version: 6.152.4

['  VERDICT: INVALID', '--all', 'ALL FILES CLEAN', '__main__', 'accuracy', 'answer', 'answer_raw', 'answers', 'bleu', 'case_id', 'context', 'correct', 'correct_answer', 'domain', 'episode_id', 'eval_score', 'exact_match', 'expected', 'f1', 'gold', 'ground_truth', 'gt', 'id', 'is_correct', 'judgment', 'label', 'num_beliefs', 'q_idx', 'qa_type', 'qa_type_name', 'question', 'question_date', 'question_id', 'question_type', 'question_uuid', 'r', 'reference_answer', 'retrieval_latency_ms', 'retrieved_context', 'rouge', 'row_idx', 'score', 'solution', 'source', 'target', 'task', 'task_type', 'utf-8']