.gitignore
.project_root
LICENSE
README.md
pyproject.toml
uv.lock
benchmarking/configs/collect-GLM-5.yaml
benchmarking/configs/collect-biomni-a1-claude-4.yaml
benchmarking/configs/collect-claude-haiku-4.5.yaml
benchmarking/configs/collect-claude-opus-4.5.yaml
benchmarking/configs/collect-claude-sonnet-4.5.yaml
benchmarking/configs/collect-deepseek-v3.2-nothink.yaml
benchmarking/configs/collect-deepseek-v3.2.yaml
benchmarking/configs/collect-fewshot-predictions.yaml
benchmarking/configs/collect-gemini-3-flash.yaml
benchmarking/configs/collect-gemini-3-pro.yaml
benchmarking/configs/collect-gemini-3.1-pro.yaml
benchmarking/configs/collect-gpt-5-mini.yaml
benchmarking/configs/collect-gpt-5.2.yaml
benchmarking/configs/collect-gpt-5.4.yaml
benchmarking/configs/collect-gpt-oss-120b.yaml
benchmarking/configs/collect-gpt-oss-20b.yaml
benchmarking/configs/collect-kimi-k2.5.yaml
benchmarking/configs/collect-minimax-m2.5.yaml
benchmarking/configs/collect-olmo-3.1-32b-think.yaml
benchmarking/configs/collect-predictions.yaml
benchmarking/configs/collect-qwen3-235B-A22B-2507.yaml
benchmarking/configs/collect-qwen3-30B-A3B-2507.yaml
benchmarking/configs/collect-qwen3-4B-2507.yaml
benchmarking/configs/collect-qwen3-coder-next.yaml
benchmarking/configs/collect-qwen3.5-0.8b.yaml
benchmarking/configs/collect-qwen3.5-122b-a10b.yaml
benchmarking/configs/collect-qwen3.5-27b.yaml
benchmarking/configs/collect-qwen3.5-2b.yaml
benchmarking/configs/collect-qwen3.5-35b-a3b.yaml
benchmarking/configs/collect-qwen3.5-397b-a17b.yaml
benchmarking/configs/collect-qwen3.5-4b.yaml
benchmarking/configs/collect-qwen3.5-9b.yaml
benchmarking/configs/ensemble-baseline.yaml
benchmarking/configs/evaluate-model-splits.yaml
benchmarking/configs/generate-baselines.yaml
benchmarking/configs/gepa/gemini-3-flash.yaml
benchmarking/exporters/export_harmonized_ensemble_predictions.py
benchmarking/exporters/export_harmonized_knn_predictions.py
benchmarking/exporters/export_harmonized_model_predictions.py
benchmarking/predictions/baselines/baseline__bm25.json
benchmarking/predictions/baselines/baseline__coarse-phenotype-hit-freq.json
benchmarking/predictions/baselines/baseline__degree.json
benchmarking/predictions/baselines/baseline__gene-name-overlap.json
benchmarking/predictions/baselines/baseline__global-hit-freq.json
benchmarking/predictions/baselines/baseline__library-size-prior.json
benchmarking/predictions/baselines/baseline__pagerank.json
benchmarking/predictions/baselines/baseline__phenotype-hit-freq.json
benchmarking/predictions/baselines/baseline__phenotype-knn-hit-freq.json
benchmarking/predictions/baselines/baseline__random.json
benchmarking/predictions/baselines/baseline__screen-type-hit-freq.json
benchmarking/predictions/ensemble/LLM__RRF__Ensemble.json
benchmarking/predictions/fewshot/fewshot__gemini-3-flash-fewshot-knn10.json
benchmarking/predictions/fewshot/fewshot__gemini-3-pro-fewshot-knn10.json
benchmarking/predictions/gepa/gepa__gemini-3-flash.json
benchmarking/predictions/knn/Embedding__kNN.json
benchmarking/predictions/knn/Oracle__kNN.json
benchmarking/predictions/llm/GLM-5.json
benchmarking/predictions/llm/Kimi-K2.5.json
benchmarking/predictions/llm/MiniMax-M2.5.json
benchmarking/predictions/llm/biomni-a1-claude-4.json
benchmarking/predictions/llm/claude-haiku-4.5.json
benchmarking/predictions/llm/claude-opus-4.5.json
benchmarking/predictions/llm/claude-sonnet-4.5.json
benchmarking/predictions/llm/deepseek-v3.2-nothink.json
benchmarking/predictions/llm/deepseek-v3.2.json
benchmarking/predictions/llm/gemini-3-flash.json
benchmarking/predictions/llm/gemini-3-pro.json
benchmarking/predictions/llm/gemini-3.1-pro.json
benchmarking/predictions/llm/gpt-5-mini.json
benchmarking/predictions/llm/gpt-5.2.json
benchmarking/predictions/llm/gpt-5.4.json
benchmarking/predictions/llm/gpt-oss-120b.json
benchmarking/predictions/llm/gpt-oss-20b.json
benchmarking/predictions/llm/olmo-3.1-32b-think.json
benchmarking/predictions/llm/qwen3-235b-a22b-2507.json
benchmarking/predictions/llm/qwen3-30b-a3b-2507.json
benchmarking/predictions/llm/qwen3-4b-2507.json
benchmarking/predictions/llm/qwen3-coder-next.json
benchmarking/predictions/llm/qwen3.5-0.8b.json
benchmarking/predictions/llm/qwen3.5-122b-a10b.json
benchmarking/predictions/llm/qwen3.5-27b.json
benchmarking/predictions/llm/qwen3.5-2b.json
benchmarking/predictions/llm/qwen3.5-35b-a3b.json
benchmarking/predictions/llm/qwen3.5-397b-a17b.json
benchmarking/predictions/llm/qwen3.5-4b.json
benchmarking/predictions/llm/qwen3.5-9b.json
benchmarking/predictions/trained/C2S__Gemma-2B__LoRA.json
benchmarking/predictions/trained/GRPO__qwen3-30b-instruct-2507.json
benchmarking/predictions/trained/SFT__GRPO__best__gpt-oss-120B.json
benchmarking/predictions/trained/SFT__gpt-oss-120B.json
benchmarking/predictions_generation/bollm_gene_embeddings.py
benchmarking/predictions_generation/collect_fewshot_predictions.py
benchmarking/predictions_generation/collect_llm_predictions.py
benchmarking/predictions_generation/download_gene_summaries.py
benchmarking/predictions_generation/evaluate_best_ensemble.py
benchmarking/predictions_generation/evaluate_model_splits.py
benchmarking/predictions_generation/generate_baseline_predictions.py
benchmarking/predictions_generation/knn_test.py
benchmarking/predictions_generation/run_ensemble_baseline.py
benchmarking/predictions_generation/run_gepa_collect_predictions.py
benchmarking/predictions_generation/run_gepa_optimization.py
benchmarking/predictions_generation/shared_utils.py
benchmarking/predictions_generation/train_relevance_predictor_classifier.py
benchmarking/predictions_generation/update_baseline_coarse_phenotype_novel_public.py
benchmarking/predictions_generation/update_fewshot_novel_public.py
benchmarking/predictions_generation/update_gepa_novel_public.py
benchmarking/predictions_generation/update_gpt_oss_120b_novel_public.py
benchmarking/predictions_generation/update_oracle_knn_novel_public.py
docs/.nojekyll
docs/README.md
docs/_config.yml
docs/bias.html
docs/build_data.py
docs/build_figures.py
docs/build_public_umap.py
docs/cite.html
docs/index.html
docs/leaderboard.html
docs/memorization.html
docs/metric.html
docs/phenotypes.html
docs/scaling.html
docs/screens.html
docs/umap.html
docs/assets/css/site.css
docs/assets/data/bias.json
docs/assets/data/categories.json
docs/assets/data/duplicate_transfer.json
docs/assets/data/leaderboard.json
docs/assets/data/memorization.json
docs/assets/data/models.json
docs/assets/data/per_screen.json
docs/assets/data/phenotype_means.json
docs/assets/data/scaling.json
docs/assets/data/screens.json
docs/assets/data/summary.json
docs/assets/figures/Figure1.png
docs/assets/figures/Figure2_combined.png
docs/assets/figures/Figure3.png
docs/assets/figures/Figure4.png
docs/assets/figures/Figure6.png
docs/assets/figures/bo_val_vs_test.png
docs/assets/figures/dna.png
docs/assets/figures/qwen_generation_mapped_adjusted_ndcg_at_100_paper.png
docs/assets/js/bias.js
docs/assets/js/leaderboard.js
docs/assets/js/memorization.js
docs/assets/js/phenotypes.js
docs/assets/js/scaling.js
docs/assets/js/screens.js
docs/assets/js/site.js
docs/assets/umap/screen_umap_explorer.html
docs/assets/umap/umap_coordinates.csv
examples/load_and_score.py
examples/load_data.ipynb
figures/generate_results_cache.py
figures/journal_figures_common.py
figures/plot0_proportions.py
figures/plot1_selected_methods.py
figures/plot2_phenotype_bar_plot_year.py
figures/plot3_duplicate_transfer_vs_model.py
figures/plot4_memorization_analysis.py
figures/plot5_scaling_laws.py
figures/plot6_bias.py
figures/results_cache_data.py
figures/data/bias_matrix.csv
figures/data/citation_count.json
figures/data/plot5_duplicate_transfer.csv
src/assaybench/__init__.py
src/assaybench.egg-info/PKG-INFO
src/assaybench.egg-info/SOURCES.txt
src/assaybench.egg-info/dependency_links.txt
src/assaybench.egg-info/requires.txt
src/assaybench.egg-info/top_level.txt
src/assaybench/benchmark/__init__.py
src/assaybench/benchmark/metrics.py
src/assaybench/data/__init__.py
src/assaybench/data/hgnc/__init__.py
src/assaybench/data/hgnc/all_genes.tsv
src/assaybench/data/hgnc/hgnc_symbols_cache.tsv
src/assaybench/data/hgnc/manual_mappings.json
src/assaybench/data/hgnc/uniprot_protein_to_gene.json
src/assaybench/data/prompts/__init__.py
src/assaybench/data/prompts/objective_prompts.yaml
src/assaybench/dataset/__init__.py
src/assaybench/dataset/dataset.py
src/assaybench/utils/__init__.py
src/assaybench/utils/gene_mapper.py
src/assaybench/utils/prompt_loaders.py
src/assaybench/utils/screen_processing.py