Raw JSON · machine-readable
{
"schema": "refract.report.v0.3.1",
"framework_version": "0.3.2",
"environment": {},
"repro_command": "",
"timestamp": "2026-04-30T16:23:41",
"score_direction": "higher_is_better",
"score_range": [
0,
100
],
"model": "/Users/tom/local_llms/models/Mistral-Small-24B-Instruct-2501-Q4_K_M.gguf",
"reference": "ctk=f16,ctv=f16",
"candidate": "ctk=q8_0,ctv=turbo4",
"composite": 90.86023928171333,
"band": "EXCELLENT",
"summary": "Indistinguishable from the reference. Safe to deploy.",
"diagnosis": [
"Mild short-context drift; long-context retrieval and perturbation robustness are intact. Likely safe for typical use; audit on your specific decoding workload before shipping."
],
"composite_detail": {
"gtm_score": 76.64796633941093,
"kld_score": 99.71012096299961,
"rniah_score": 100.0,
"plad_score": 91.34151697077642,
"floor_score": null,
"floor_ok": null,
"floor_min": 99.5,
"notes": []
},
"axes": {
"gtm": {
"score": 76.64796633941093,
"full_match_rate": 0.5333333333333333,
"median_first_divergence": 25.5,
"mean_prefix_agreement_length": 36.43333333333333,
"mean_cand_length": 47.53333333333333,
"mean_ref_length": 47.53333333333333,
"n_prompts": 30,
"n_tokens_each": 50,
"per_prompt": [],
"notes": [],
"band": "DEGRADED",
"description": "Token-level agreement with the fp16 reference."
},
"kld": {
"score": 99.71012096299961,
"mean_kld": 0.002903,
"ppl": null,
"rms_dp_pct": null,
"same_topp_pct": null,
"base_path": "",
"chunks": 32,
"ctx": 512,
"is_self_reference": false,
"corpus": null,
"band": "EXCELLENT",
"description": "Distribution-level divergence from the fp16 reference."
},
"rniah": {
"score": 100.0,
"n_cells": 9,
"cells": [
{
"length": 4096,
"position": 0.1,
"n_trials": 1,
"base_acc": 1.0,
"cand_acc": 1.0,
"degradation": 0.0,
"base_hits": 1,
"cand_hits": 1
},
{
"length": 4096,
"position": 0.5,
"n_trials": 1,
"base_acc": 1.0,
"cand_acc": 1.0,
"degradation": 0.0,
"base_hits": 1,
"cand_hits": 1
},
{
"length": 4096,
"position": 0.9,
"n_trials": 1,
"base_acc": 1.0,
"cand_acc": 1.0,
"degradation": 0.0,
"base_hits": 1,
"cand_hits": 1
},
{
"length": 8192,
"position": 0.1,
"n_trials": 1,
"base_acc": 1.0,
"cand_acc": 1.0,
"degradation": 0.0,
"base_hits": 1,
"cand_hits": 1
},
{
"length": 8192,
"position": 0.5,
"n_trials": 1,
"base_acc": 1.0,
"cand_acc": 1.0,
"degradation": 0.0,
"base_hits": 1,
"cand_hits": 1
},
{
"length": 8192,
"position": 0.9,
"n_trials": 1,
"base_acc": 1.0,
"cand_acc": 1.0,
"degradation": 0.0,
"base_hits": 1,
"cand_hits": 1
},
{
"length": 16384,
"position": 0.1,
"n_trials": 1,
"base_acc": 0.0,
"cand_acc": 0.0,
"degradation": 0.0,
"base_hits": 0,
"cand_hits": 0
},
{
"length": 16384,
"position": 0.5,
"n_trials": 1,
"base_acc": 0.0,
"cand_acc": 0.0,
"degradation": 0.0,
"base_hits": 0,
"cand_hits": 0
},
{
"length": 16384,
"position": 0.9,
"n_trials": 1,
"base_acc": 0.0,
"cand_acc": 0.0,
"degradation": 0.0,
"base_hits": 0,
"cand_hits": 0
}
],
"skipped_cells": [],
"needle": "Note: APRICOT-7-BLUE is the rare paint color featured in this article.",
"password_keyword": "APRICOT-7-BLUE",
"notes": [],
"confidence": "ok",
"base_acc_avg": 0.6666666666666666,
"band": "EXCELLENT",
"description": "Long-context retrieval quality vs the reference."
},
"plad": {
"score": 91.34151697077642,
"per_perturbation_score": {
"typo": 89.78368688439093,
"case": 96.52131684820382,
"punct": 88.30633182429914,
"paraphrase": NaN
},
"per_prompt": [],
"n_prompts": 30,
"n_perturbations": 4,
"notes": [
"36 (prompt, perturbation) pairs were skipped (perturbation could not apply, e.g. no \u22654-char word for typo)."
],
"skipped_perturbations": [
"paraphrase"
],
"confidence": "partial",
"band": "EXCELLENT",
"description": "Robustness to small prompt changes vs the reference."
}
},
"extras": {}
}