Decode distribution drift detected: the candidate generates different tokens than fp16 on short-context prompts.
Brittleness to small input changes: candidate's output drifts more than fp16's under typo / casing / punctuation perturbations.
| length \ position | 0.10 | 0.50 | 0.90 |
|---|---|---|---|
| 4096 | 1.00 / 1.00 | 1.00 / 1.00 | 1.00 / 1.00 |
| 8192 | 1.00 / 1.00 | 1.00 / 1.00 | 1.00 / 1.00 |
| 16384 | n/a | n/a | n/a |
| perturbation | score | band | distribution | |
|---|---|---|---|---|
| typo | 75.14 | Degraded | ||
| case | 70.44 | Degraded | ||
| punct | 71.01 | Degraded | ||
| paraphrase | skipped | N/A | didn't apply on these prompts |
'~/dev/turboquant/refract/cli.py' score --model '~/models/gemma-4-26B-A4B-it-Q8_0.gguf' --reference ctk=f16,ctv=f16 --candidate ctk=turbo4,ctv=turbo4 --prompts refract/prompts/v0.1.jsonl --full --rniah-up-to 16384 --rniah-trials 1 --no-progress --json-out '~/dev/turboquant/refract/examples/catastrophic-symturbo.json' --html-out '~/dev/turboquant/refract/examples/catastrophic-symturbo.html'
{
"schema": "refract.report.v0.3.1",
"framework_version": "0.3.2",
"environment": {
"backend": "llamacpp",
"model": "/Users/tom/models/gemma-4-26B-A4B-it-Q8_0.gguf",
"llama_cpp_bin_dir": "/Users/tom/local_llms/llama.cpp/build-test/bin",
"llama_cpp_commit": "817e913ec"
},
"repro_command": "",
"timestamp": "2026-04-30T16:46:09",
"score_direction": "higher_is_better",
"score_range": [
0,
100
],
"model": "/Users/tom/models/gemma-4-26B-A4B-it-Q8_0.gguf",
"reference": "ctk=f16,ctv=f16",
"candidate": "ctk=turbo4,ctv=turbo4",
"composite": 11.030876948704718,
"band": "FAIL",
"summary": "Material quality loss. Treat as broken.",
"diagnosis": [
"Decode distribution drift detected: the candidate generates different tokens than fp16 on short-context prompts.",
"Brittleness to small input changes: candidate's output drifts more than fp16's under typo / casing / punctuation perturbations."
],
"composite_detail": {
"gtm_score": 3.9317721884937846,
"kld_score": 11.84391443627722,
"rniah_score": 100.0,
"plad_score": 72.20886835193117,
"floor_score": null,
"floor_ok": null,
"floor_min": 99.5,
"notes": []
},
"axes": {
"gtm": {
"score": 3.9317721884937846,
"full_match_rate": 0.0,
"median_first_divergence": 2.0,
"mean_prefix_agreement_length": 4.533333333333333,
"mean_cand_length": 115.3,
"mean_ref_length": 118.4,
"n_prompts": 30,
"n_tokens_each": 128,
"per_prompt": [
{
"id": "fact-001",
"category": "factual",
"prompt": "The capital of France is",
"ref_token_ids": [
31567,
1768,
3769,
236764,
236772,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031,
965,
753,
1031
],
"cand_token_ids": [
236761,
107,
45518,
107,
101,
818,
5279,
529,
7001,
563,
5213,
50429,
84750,
106
],
"first_divergence": 0,
"prefix_agreement_length": 0,
"cand_length": 14,
"ref_length": 128,
"matched": false
},
{
"id": "fact-002",
"category": "factual",
"prompt": "The largest ocean on Earth is the",
"ref_token_ids": [
1852,
236772,
107,
236772,
1852,
236772,
107,
236772,
1852,
236772,
107,
236772,
1852,
236772,
107,
236772,
1852,
236772,
7488,
236761,
107,
236772,
7488,
236761,
107,
236772,
7488,
236761,
107,
236772,
7488,
236761,
107,
236772,
7488,
236761,
107,
236772,
7488,
236761,
107,
236772,
7488,
236761,
107,
236772,
7488,
236761,
107,
236772,
7488,
236761,
107,
236772,
7488,
236761,
107,
236772,
7488,
236761,
107,
236772,
7488,
236761,
107,
236772,
7488,
236761,
107,
236772,
7488,
236761,
107,
236772,
7488,
236761,
107,
236772,
7488,
236761,
107,
236772,
7488,
236761,
107,
101,
100,
45518,
107,
101,
818,
7488,
12461,
580,
10824,
563,
506,
5213,
47204,
18414,
84750,
106
],
"cand_token_ids": [
7488,
12461,
580,
10824,
563,
506,
7488,
12461,
580,
10824,
563,
506,
7488,
12461,
580,
10824,
563,
506,
7488,
12461,
580,
10824,
563,
506,
7488,
12461,
580,
10824,
563,
506,
7488,
12461,
580,
10824,
563,
506,
7488,
12461,
580,
10824,
563,
506,
7488,
12461,
580,
10824,
563,
506,
7488,
12461,
580,
10824,
563,
506,
7488,
12461,
580,
10824,
563,
506,
7488,
12461,
580,
10824,
563,
506,
7488,
12461,
580,
10824,
563,
506,
7488,
12461,
580,
10824,
563,
506,
7488,
12461,
580,
10824,
563,
506,
7488,
12461,
580,
10824,
563,
506,
7488,
12461,
580,
10824,
563,
506,
7488,
12461,
580,
10824,
563,
506,
7488,
12461,
580,
10824,
563,
506,
7488,
12461,
580,
10824,
563,
506,
7488,
12461,
580,
10824,
563,
506,
7488,
12461,
580,
10824,
563,
506,
7488,
12461
],
"first_divergence": 0,
"prefix_agreement_length": 0,
"cand_length": 128,
"ref_length": 102,
"matched": false
},
{
"id": "fact-003",
"category": "factual",
"prompt": "The chemical symbol for gold is",
"ref_token_ids": [
620,
236772,
236748,
236772,
236752,
236772,
236753,
568,
100,
45518,
2268,
1493,
519,
236772,
236748,
236772,
36604,
1493,
519,
236772,
236752,
236772,
36604,
1493,
519,
236772,
236753,
769,
108,
20470,
236764,
600,
236789,
236751,
711,
1447,
236761,
669,
7395,
5404,
573,
5122,
563,
5213,
36928,
84750,
108,
236769,
818,
623,
236767,
236772,
236748,
236772,
236752,
236772,
236753,
236775,
3210,
691,
496,
42521,
18932,
15520,
106
],
"cand_token_ids": [
236924,
108,
45518,
107,
101,
818,
7395,
5404,
573,
5122,
563,
5213,
36928,
84750,
106
],
"first_divergence": 0,
"prefix_agreement_length": 0,
"cand_length": 15,
"ref_length": 65,
"matched": false
},
{
"id": "fact-004",
"category": "factual",
"prompt": "The author of Romeo and Juliet is",
"ref_token_ids": [
27435,
236772,
1852,
236772,
13513,
1852,
236772,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
236770,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
236770,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513,
236761,
13513
],
"cand_token_ids": [
1638,
618,
506,
101,
818,
3260,
529,
808,
231194,
532,
93560,
236829,
563,
5213,
34848,
36951,
84750,
106
],
"first_divergence": 0,
"prefix_agreement_length": 0,
"cand_length": 18,
"ref_length": 128,
"matched": false
},
{
"id": "fact-005",
"category": "factual",
"prompt": "The tallest mountain in the world is",
"ref_token_ids": [
92900,
236772,
598,
236761,
107,
236771,
236770,
236787,
236771,
236770,
236787,
236771,
236770,
236787,
236771,
236770,
107,
45518,
107,
101,
818,
13315,
611,
3847,
563,
496,
83805,
2591,
529,
5213,
232657,
5710,
1018,
532,
5213,
9181,
236772,
23771,
40095,
84750,
108,
8291,
563,
496,
25890,
529,
3217,
625,
11979,
855,
236787,
108,
236770,
236761,
138,
1018,
9264,
1136,
5710,
568,
236774,
1474,
1865,
1473,
1018,
140775,
623,
818,
92900,
10565,
1390,
563,
92900,
236775,
563,
1133,
6420,
623,
818,
7445,
8205,
563,
7445,
1781,
1030,
66618,
506,
1638,
1938,
10911,
2180,
8009,
861,
6590,
236761,
107,
236778,
236761,
138,
1018,
14042,
236772,
23771,
79378,
6791,
124591,
598,
236772,
598,
27077,
1018,
799,
5422,
236764,
623,
98599,
598,
236775,
563,
3016,
506,
5213,
12666,
236752,
1280,
1018,
1183,
529
],
"cand_token_ids": [
92900,
236772,
598,
236761,
107,
220114,
1898,
236772,
598,
236743,
1898,
236772,
598,
236743,
1898,
236772,
598,
236743,
1898,
236772,
598,
236743,
107,
107,
10541,
13184,
236772,
598,
236743,
107,
100,
101,
818,
10565,
563,
11519,
236772,
598,
236764,
107,
818,
7217,
563,
3730,
236772,
598,
236764,
107,
818,
4171,
659,
17163,
236772,
598,
236764,
107,
3133,
506,
40095,
563,
1343,
236772,
598,
236761,
108,
3048,
735,
8452,
506,
5213,
44700,
236772,
598,
1018,
529,
5213,
236797,
1190,
1867,
236772,
598,
1018,
236888,
106
],
"first_divergence": 5,
"prefix_agreement_length": 5,
"cand_length": 84,
"ref_length": 128,
"matched": false
},
{
"id": "arith-001",
"category": "arithmetic",
"prompt": "Two plus two equals",
"ref_token_ids": [
236772,
108,
236829,
236778,
900,
236743,
1346,
4547,
1595,
531,
808,
568,
236778,
236768,
236743,
236778,
236768,
236743,
236778,
236768,
236743,
236778,
236768,
236743,
236778,
236768,
236743,
236778,
236768,
236743,
236778,
236768,
236743,
236778,
236768,
236743,
236778,
236768,
236743,
236778,
236768,
236743,
236778,
236768,
236743,
236778,
236768,
236743,
29345,
236772,
108,
29345,
236772,
236743,
236812,
236761,
108,
15958,
236772,
236743,
236812,
236761,
108,
15958,
236772,
236743,
236778,
236761,
108,
15958,
236772,
236743,
236778,
236761,
108,
15958,
236772,
236743,
236778,
236761,
108,
15958,
236772,
236743,
236778,
236761,
108,
15958,
236772,
236743,
236778,
236761,
108,
15958,
236772,
236743,
1114,
236772,
236743,
236778,
236761,
108,
15958,
236772,
1114,
236772,
236743,
236778,
236761,
108,
15958,
236772,
1114,
236772,
236743,
236778,
236761,
108,
138442,
2407,
236772,
1114,
236772,
236778,
236761,
108,
138442,
237184
],
"cand_token_ids": [
236772,
108,
1018,
236772,
236778,
236772,
1018,
568,
11634,
12174,
1156,
14339,
5743,
236764,
840,
528,
672,
4403,
236764,
4403,
236772,
46443,
11731,
236772,
46443,
11731,
236772,
46443,
11731,
236772,
46443,
11731,
236772,
46443,
11731,
236772,
46443,
11731,
236772,
46443,
11731,
236772,
46443,
11731,
236772,
46443,
11731,
236772,
46443,
11731,
236772,
46443,
11731,
236772,
46443,
11731,
236772,
46443,
11731,
236772,
46443,
11731,
236772,
46443,
11731,
236772,
46443,
11731,
236772,
46443,
11731,
236772,
46443,
11731,
236772,
46443,
11731,
236772,
46443,
11731,
236772,
46443,
1854,
236772,
46443,
11731,
236772,
46443,
1854,
236772,
46443,
11731,
236772,
46443,
1854,
236772,
46443,
11731,
236772,
46443,
1854,
236772,
46443,
11731,
236772,
46443,
1854,
236772,
46443,
11731,
236772,
46443,
1854,
236772,
46443,
11731,
236772,
46443,
1854,
236772,
46443,
11731,
236772,
46443,
1854,
236772,
46443,
11731
],
"first_divergence": 2,
"prefix_agreement_length": 2,
"cand_length": 128,
"ref_length": 128,
"matched": false
},
{
"id": "arith-002",
"category": "arithmetic",
"prompt": "17 times 24 equals",
"ref_token_ids": [
236772,
108,
1018,
236770,
236832,
1123,
236743,
236778,
236812,
578,
236743,
236812,
236771,
236828,
1018,
108,
1018,
236770,
100,
236832,
1123,
236743,
236778,
236812,
578,
236743,
236812,
236771,
236828,
1018,
108,
1018,
236770,
236832,
1123,
236743,
236778,
236812,
578,
236743,
236812,
236771,
236828,
1018,
108,
1018,
236770,
236832,
1123,
236743,
236778,
236812,
578,
236743,
236770,
236832,
1123,
568,
236778,
236771,
900,
236743,
236812,
236768,
578,
236743,
236800,
236812,
236771,
900,
236743,
236825,
236828,
578,
236743,
236812,
236771,
236828,
1018,
108,
1018,
236770,
236832,
1123,
236743,
236778,
236812,
578,
568,
236770,
236832,
1123,
236743,
236778,
236771,
236768,
900,
568,
236770,
236832,
1123,
236743,
236812,
236768,
578,
236743,
236800,
236812,
236771,
900,
236743,
236825,
236828,
578,
236743,
236812,
236771,
236828,
1018,
108,
1018,
236770,
236832,
1123,
236743,
236778,
236812,
578
],
"cand_token_ids": [
236772,
108,
1018,
236770,
236832,
1123,
236743,
236778,
236812,
578,
236743,
236812,
236771,
236828,
1018,
108,
1018,
236770,
236832,
1123,
236743,
236778,
236812,
578,
236743,
236812,
236771,
236828,
1018,
108,
1018,
236770,
236832,
1123,
236743,
236778,
236812,
578,
236743,
236812,
236771,
236828,
1018,
108,
1018,
236770,
236832,
1123,
236743,
236828,
578,
236743,
236770,
236800,
236825,
1018,
107,
1018,
236770,
236832,
1123,
236743,
236770,
236825,
578,
236743,
236778,
236832,
236778,
1018,
107,
1018,
236770,
236832,
1123,
236743,
236778,
236812,
578,
236743,
236812,
236771,
236828,
1018,
108,
1018,
236770,
236832,
1123,
236743,
236778,
236812,
578,
236743,
236812,
236771,
236828,
1018,
108,
1018,
236770,
236832,
1123,
236743,
236778,
236812,
578,
236743,
236812,
236771,
236828,
1018,
108,
1018,
236770,
236832,
1123,
236743,
236778,
236812,
578,
236743,
236812,
236771,
236828,
1018,
108,
1018
],
"first_divergence": 18,
"prefix_agreement_length": 18,
"cand_length": 128,
"ref_length": 128,
"matched": false
},
{
"id": "arith-003",
"category": "arithmetic",
"prompt": "100 divided by 4 is",
"ref_token_ids": [
236743,
236770,
236771,
236771,
965,
236743,
236812,
578,
236743,
45518,
569,
759,
569,
9105,
236772,
108,
45518,
569,
759,
569,
9105,
236772,
108,
45518,
569,
759,
569,
9105,
236772,
108,
45518,
569,
759,
569,
9105,
236772,
108,
45518,
569,
759,
569,
9105,
236772,
108,
45518,
569,
759,
569,
9105,
236772,
108,
45518,
569,
759,
569,
9105,
236772,
108,
45518,
569,
759,
569,
9105,
236772,
108,
45518,
569,
759,
569,
9105,
236772,
108,
45518,
569,
759,
569,
9105,
236772,
108,
45518,
569,
759,
569,
9105,
236772,
13513,
108,
236770,
236771,
236771,
11310,
684,
236743,
236812,
563,
5213,
236778,
236810,
84750,
108,
8291,
563,
506,
25890,
236787,
107,
236829,
139,
236795,
236770,
236771,
236771,
621,
1928,
236743,
236812,
578,
236743,
236778,
236810,
236795,
108,
3048,
740,
1751,
529,
625,
618
],
"cand_token_ids": [
236743,
236770,
236771,
236771,
236786,
236812,
578,
236743,
236778,
236810,
236761,
236743,
108,
3048,
236789,
500,
528,
496,
569,
70779,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760,
236772,
236760
],
"first_divergence": 4,
"prefix_agreement_length": 4,
"cand_length": 128,
"ref_length": 128,
"matched": false
},
{
"id": "arith-004",
"category": "arithmetic",
"prompt": "The square root of 81 is",
"ref_token_ids": [
6281,
5989,
529,
236743,
11632,
506,
6281,
5989,
529,
236743,
1075,
236772,
236828,
236770,
563,
27296,
236772,
236828,
236770,
236761,
11632,
506,
6281,
5989,
529,
236743,
236828,
236770,
563,
236743,
236819,
236761,
236743,
108,
100,
6174,
236772,
236828,
236770,
563,
236743,
236819,
236761,
107,
80073,
506,
6281,
5989,
529,
236743,
236828,
236770,
563,
236743,
236819,
236761,
108,
2717,
108,
818,
2744,
1816,
563,
6112,
58148,
236764,
5781,
16658,
16658,
236764,
532,
6097,
30696,
227753,
31368,
568,
5282,
623,
1075,
236772,
236828,
236770,
827,
623,
6174,
236772,
236828,
236770,
4248,
1030,
7412,
531,
577,
496,
11207,
653,
31578,
7835,
1816,
26813,
531,
8595,
600,
874,
4784,
236782,
236828,
236770,
236783,
578,
236743,
236819,
2104,
108,
1018,
35559,
53121,
107,
39446,
872,
506,
1816,
531,
1386,
625,
496,
23420,
236764
],
"cand_token_ids": [
6281,
5989,
529,
236743,
236828,
236770,
563,
236743,
236819,
236761,
107,
4784,
236772,
236828,
236770,
563,
711,
236743,
236828,
236770,
236761,
36518,
236772,
236828,
236770,
563,
711,
236743,
236828,
236770,
236761,
36518,
236772,
236828,
236770,
563,
711,
236743,
236828,
236770,
236761,
36518,
236772,
236828,
236770,
563,
711,
236743,
236770,
236761,
236743,
107,
4784,
236772,
236828,
236770,
563,
711,
236743,
236828,
236770,
236761,
36518,
236772,
236828,
236770,
563,
711,
236743,
236828,
236770,
236761,
36518,
236772,
236828,
236770,
563,
236828,
236743,
236828,
236761,
236743,
167,
140,
107,
236769,
236771,
236761,
236810,
13365,
1852,
236772,
236828,
236770,
563,
568,
236771,
236761,
236810,
13365,
236772,
236828,
236770,
563,
568,
236771,
236761,
236810,
13365,
236772,
236828,
236770,
236761,
236743,
236771,
236761,
236810,
563,
236743,
236828,
236770,
236761,
236743,
236771,
236761,
236810,
563,
236743
],
"first_divergence": 4,
"prefix_agreement_length": 4,
"cand_length": 128,
"ref_length": 128,
"matched": false
},
{
"id": "arith-005",
"category": "arithmetic",
"prompt": "Half of 250 equals",
"ref_token_ids": [
236743,
236770,
236778,
236810,
236761,
107,
51671,
529,
236743,
236778,
236810,
236771,
14339,
236743,
236770,
236778,
236810,
236761,
107,
51671,
529,
236743,
236778,
236810,
236771,
14339,
236743,
236770,
236778,
236810,
236761,
107,
51671,
529,
236743,
236778,
236810,
236771,
14339,
236743,
236770,
236778,
236810,
236761,
107,
51671,
529,
236743,
236770,
236778,
236810,
14339,
236743,
236825,
236778,
236761,
236810,
236761,
107,
51671,
529,
236743,
236770,
236778,
236810,
14339,
236743,
236825,
236778,
236761,
236810,
236761,
107,
51671,
529,
236743,
236770,
236778,
236810,
14339,
236743,
236810,
236771,
236761,
107,
51671,
529,
236743,
236770,
236778,
236810,
14339,
236743,
236810,
236771,
236761,
107,
51671,
529,
236743,
236770,
236778,
236810,
14339,
236743,
236810,
236771,
236761,
107,
51671,
529,
236743,
236770,
236778,
236810,
14339,
236743,
236810,
236771,
236761,
107,
51671,
529,
236743,
236770,
236778,
236810,
14339
],
"cand_token_ids": [
236743,
236778,
236810,
236771,
236786,
236778,
236761,
107,
51671,
529,
236743,
236778,
236810,
236771,
14339,
236743,
236770,
236778,
236810,
236761,
107,
51671,
529,
236743,
236778,
236810,
236771,
14339,
236743,
236770,
236778,
236810,
236761,
107,
51671,
529,
236743,
236778,
236810,
236771,
14339,
236743,
236770,
236778,
236810,
236761,
107,
51671,
529,
236743,
236778,
236810,
236771,
14339,
236743,
236770,
236778,
236810,
236761,
107,
51671,
529,
236743,
236778,
236810,
236771,
14339,
236743,
236770,
236778,
236810,
236761,
107,
51671,
529,
236743,
236778,
236810,
236771,
14339,
236743,
236770,
236778,
236810,
236761,
107,
51671,
529,
236743,
236778,
236810,
236771,
14339,
236743,
236770,
236778,
236810,
236761,
107,
51671,
529,
236743,
236778,
236810,
236771,
14339,
236743,
236770,
236778,
236810,
236761,
107,
51671,
529,
236743,
236778,
236810,
236771,
14339,
236743,
236770,
236778,
236810,
236761,
107,
51671,
529,
236743
],
"first_divergence": 1,
"prefix_agreement_length": 1,
"cand_length": 128,
"ref_length": 128,
"matched": false
},
{
"id": "code-001",
"category": "code",
"prompt": "def fib(n):\n ",
"ref_token_ids": [
584,
538,
6605,
236743,
236770,
236787,
107,
144,
584,
538,
1251,
236743,
236771,
236787,
107,
148,
2060,
236743,
236771,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
236772,
107,
148,
107,
148,
107,
148,
107,
148,
236772,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107,
148,
107
],
"cand_token_ids": [
584,
538,
6605,
236743,
236770,
236787,
107,
144,
2060,
538,
107,
140,
4454,
236787,
107,
144,
2060,
10779,
236769,
236749,
236772,
236770,
236768,
900,
10779,
236769,
236749,
236772,
236778,
236768,
107,
2717,
108,
818,
2430,
8150,
531,
1586,
506,
1346,
8143,
1595,
531,
9279,
506,
123466,
7501,
872,
531,
496,
2953,
1548,
609,
236749,
2104,
108,
818,
2430,
3847,
496,
2269,
17856,
3393,
54369,
236761,
669,
108,
2717,
6719,
107,
2063,
10779,
236769,
236749,
1473,
107,
140,
584,
538,
6605,
236743,
236770,
236787,
107,
144,
2060,
538,
107,
140,
4454,
236787,
107,
144,
2060,
10779,
236769,
236749,
236772,
236770,
236768,
900,
10779,
236769,
236749,
236772,
236778,
236768,
107,
2717,
108,
818,
2430,
8150,
531,
1586,
506,
1346,
8143,
1595,
531,
9279,
506,
123466,
7501,
872,
531,
496,
2953
],
"first_divergence": 8,
"prefix_agreement_length": 8,
"cand_length": 128,
"ref_length": 128,
"matched": false
},
{
"id": "code-002",
"category": "code",
"prompt": "def is_prime(n):\n ",
"ref_token_ids": [
584,
538,
6605,
236743,
236770,
236787,
107,
148,
2060,
8450,
107,
140,
584,
538,
1251,
236743,
236778,
236787,
107,
156,
2060,
6288,
107,
140,
584,
538,
2144,
236743,
236778,
1251,
236743,
236771,
236787,
107,
156,
2060,
8450,
107,
140,
584,
538,
2144,
236743,
236800,
1251,
236743,
236771,
236787,
107,
156,
2060,
8450,
107,
140,
236865,
7179,
573,
5872,
872,
531,
506,
6281,
5989,
529,
538,
107,
148,
236865,
7179,
573,
5872,
872,
531,
506,
6281,
5989,
529,
538,
107,
148,
236865,
7179,
573,
5872,
872,
531,
506,
6281,
5989,
7610,
529,
538,
107,
148,
236865,
7179,
573,
5872,
872,
531,
506,
6281,
5989,
7610,
529,
538,
107,
148,
236865,
7179,
573,
5872,
872,
531,
506,
6281,
5989,
7610,
529,
538,
107,
148,
236865,
7179,
573,
5872,
872,
531
],
"cand_token_ids": [
584,
538,
6605,
236743,
236770,
236787,
107,
144,
2060,
8450,
107,
140,
1708,
858,
528,
2644,
236769,
236778,
236764,
801,
236769,
236749,
1018,
236771,
236761,
236810,
236768,
900,
236743,
236770,
1473,
107,
144,
2060,
858,
2843,
538,
965,
858,
997,
1174,
563,
496,
3103,
20973,
236764,
840,
1531,
236789,
236751,
1460,
236761,
107,
140,
236865,
669,
7350,
2787,
815,
496,
994,
5456,
4888,
625,
236761,
107,
140,
236865,
1174,
236789,
236751,
496,
3103,
20973,
236764,
2311,
236761,
107,
140,
236865,
3792,
236789,
236751,
1164,
236789,
236751,
1164,
236789,
236751,
1164,
236789,
236751,
1164,
236789,
236751,
1164,
236789,
236751,
1164,
236789,
236751,
1164,
236789,
236751,
1164,
236789,
236751,
1164,
236789,
236751,
1164,
236789,
236751,
1164,
236789,
236751,
1164,
236789,
236751,
1164,
236789,
236751,
1164,
236789,
236751,
1164,
236789,
236751
],
"first_divergence": 7,
"prefix_agreement_length": 7,
"cand_length": 128,
"ref_length": 128,
"matched": false
},
{
"id": "code-003",
"category": "code",
"prompt": "// JavaScript function to reverse a string\nfunction reverse(s) {\n ",
"ref_token_ids": [
2060,
503,
236761,
6966,
885,
4248,
34220,
2444,
7013,
24546,
107,
236783,
108,
715,
14691,
14120,
236787,
107,
1987,
1041,
1859,
578,
623,
9259,
4109,
1721,
107,
1987,
29695,
1859,
578,
14416,
236769,
3307,
1859,
626,
107,
8717,
236761,
1587,
236769,
140879,
1859,
626,
973,
16887,
236787,
623,
15967,
530,
236824,
4276,
519,
236814,
236775,
107,
2717,
108,
10354,
25252,
529,
506,
8739,
107,
818,
3393,
4728,
496,
3606,
236764,
3161,
236772,
2501,
4313,
531,
5213,
34220,
496,
2483,
1018,
528,
20737,
236761,
1030,
6178,
5284,
236772,
495,
4615,
236787,
107,
236770,
236761,
138,
1018,
236929,
6966,
48391,
236929,
66515,
114248,
506,
2483,
1131,
614,
3499,
529,
3141,
7579,
236761,
107,
236778,
236761,
138,
1018,
236929,
6966,
885,
4248,
34220,
73962,
66515,
1276,
159747,
506,
1900,
529,
506
],
"cand_token_ids": [
2060,
503,
236761,
6966,
885,
4248,
34220,
2444,
7013,
24546,
107,
236783,
108,
2501,
14416,
236769,
236751,
236768,
642,
107,
140,
1184,
637,
578,
13033,
107,
140,
1184,
858,
578,
858,
236793,
236743,
236778,
236793,
973,
1174,
563,
496,
33413,
3165,
107,
140,
1184,
858,
578,
858,
236793,
236743,
236778,
236793,
973,
1174,
3165,
563,
2036,
1590,
107,
140,
1184,
858,
578,
858,
236793,
236743,
236778,
236793,
973,
1174,
3165,
563,
2036,
1590,
107,
140,
1184,
858,
578,
858,
236793,
236743,
236778,
236793,
973,
1174,
3165,
563,
2036,
1861,
107,
140,
1184,
858,
578,
858,
236793,
236743,
236778,
236793,
973,
1174,
3165,
563,
2036,
1861,
107,
140,
1184,
858,
578,
858,
236793,
236743,
236778,
236793,
236743,
973,
1174,
3165,
563,
2036,
1861,
107,
140,
1184,
858,
578,
858
],
"first_divergence": 13,
"prefix_agreement_length": 13,
"cand_length": 128,
"ref_length": 128,
"matched": false
},
{
"id": "code-004",
"category": "code",
"prompt": "# Python: list comprehension to square numbers 1 to 10\nsquares = ",
"ref_token_ids": [
870,
236781,
1018,
236778,
573,
1123,
528,
2644,
236769,
236770,
236764,
236743,
236770,
236770,
7066,
107,
1995,
236769,
65584,
236768,
107,
2717,
108,
818,
3847,
3393,
54369,
563,
496,
4011,
2591,
529,
1217,
531,
1161,
1694,
62721,
528,
17856,
531,
2121,
496,
3606,
4209,
23057,
532,
214219,
236761,
108,
10354,
9533,
76423,
108,
10354,
236743,
236770,
236761,
4361,
139206,
37979,
107,
1613,
62721,
563,
496,
148260,
9792,
600,
623,
106389,
25860,
9792,
236775,
600,
623,
106389,
25860,
9792,
236775,
600,
623,
106389,
25860,
9792,
236775,
600,
623,
106389,
25860,
9792,
236775,
600,
623,
106389,
25860,
9792,
236775,
600,
623,
106389,
25860,
9792,
236775,
600,
623,
106389,
25860,
9792,
236775,
600,
623,
106389,
25860,
9792,
236775,
600,
623,
106389,
25860,
9792,
236775,
600,
623,
106389,
25860,
9792,
236775,
600
],
"cand_token_ids": [
2234,
236769,
209902,
37979,
236779,
50345,
236779,
8472,
236768,
107,
65584,
578,
870,
236781,
1018,
236778,
573,
1123,
528,
2644,
236769,
236770,
236764,
236743,
236770,
236770,
7066,
107,
65584,
578,
870,
236781,
1018,
236778,
573,
1123,
528,
2644,
236769,
236770,
236764,
236743,
236770,
236770,
7066,
107,
65584,
578,
870,
236781,
1018,
236778,
573,
1123,
528,
2644,
236769,
236770,
236764,
236743,
236770,
236770,
7066,
107,
65584,
578,
870,
236781,
1018,
236778,
573,
1123,
528,
2644,
236769,
236770,
236770,
7066,
107,
65584,
578,
870,
236781,
1018,
236778,
573,
1123,
528,
2644,
236769,
236770,
236764,
236743,
236770,
236770,
7066,
107,
65584,
578,
870,
236781,
1018,
236778,
573,
1123,
528,
2644,
236769,
236770,
236764,
236743,
236770,
236770,
7066,
107,
65584,
578,
870,
236781,
1018,
236778,
573,
1123,
236772,
495,
2644,
236769,
236770
],
"first_divergence": 0,
"prefix_agreement_length": 0,
"cand_length": 128,
"ref_length": 128,
"matched": false
},
{
"id": "code-005",
"category": "code",
"prompt": "SELECT name FROM users WHERE ",
"ref_token_ids": [
547,
578,
236743,
236770,
236793,
2617,
1174,
563,
496,
5739,
107,
2717,
108,
10354,
236743,
236778,
236761,
15963,
89782,
568,
13886,
236747,
236768,
107,
1018,
16716,
53121,
1455,
59471,
16972,
531,
52266,
496,
7609,
531,
37608,
31324,
236764,
2802,
62362,
1262,
236764,
653,
17114,
236786,
8335,
7948,
8735,
236761,
108,
1018,
12703,
53121,
107,
70895,
496,
14209,
1183,
1298,
506,
2430,
28062,
910,
2165,
11668,
236929,
532,
2165,
8707,
21233,
107,
818,
3739,
3393,
2473,
1385,
1133,
672,
236787,
107,
2717,
9275,
107,
726,
130550,
742,
8739,
568,
145573,
236768,
107,
12874,
808,
10912,
5089,
17347,
15345,
578,
36467,
2364,
236779,
2427,
236789,
4715,
8918,
578,
36467,
8707,
236779,
2427,
2134,
107,
2717,
108,
2859,
614,
59471,
28062,
506,
2269,
1131,
506,
2165,
11668,
236929,
2135,
236787,
107
],
"cand_token_ids": [
547,
578,
236743,
236770,
236793,
107,
2717,
108,
902,
4955,
107,
818,
2430,
236789,
236751,
2744,
563,
496,
6921,
529,
15963,
532,
127517,
236761,
669,
2430,
236789,
236751,
2744,
563,
496,
6921,
529,
15963,
532,
127517,
236761,
669,
2430,
236789,
236751,
2744,
563,
496,
6921,
529,
15963,
532,
127517,
236761,
669,
2430,
236789,
236751,
2744,
563,
496,
6921,
529,
15963,
532,
127517,
236761,
669,
2430,
236789,
236751,
2744,
563,
496,
6921,
529,
15963,
532,
127517,
236761,
669,
2430,
236789,
236751,
2744,
563,
496,
6921,
529,
15963,
532,
127517,
236761,
669,
2430,
236789,
236751,
2744,
563,
496,
6921,
529,
15963,
532,
127517,
236761,
669,
2430,
236789,
236751,
2744,
563,
496,
6921,
529,
15963,
532,
127517,
236761,
669,
2430,
236789,
236751,
2744,
563,
496,
6921,
529,
15963,
532,
127517,
236761
],
"first_divergence": 5,
"prefix_agreement_length": 5,
"cand_length": 128,
"ref_length": 128,
"matched": false
},
{
"id": "reason-001",
"category": "reasoning",
"prompt": "If a train leaves Boston at 3pm traveling 60 mph, after 2 hours it has traveled",
"ref_token_ids": [
236772,
108,
236772,
91078,
236772,
236825,
236771,
236743,
107,
236772,
91078,
236772,
236825,
236771,
236743,
107,
236772,
91078,
236772,
236825,
236771,
236743,
107,
101,
45518,
107,
101,
6259,
236743,
236778,
3885,
236764,
506,
2519,
815,
33402,
5213,
236770,
236778,
236771,
7635,
84750,
108,
236769,
120609,
236787,
236743,
236825,
236771,
44269,
15666,
236743,
236778,
3885,
578,
236743,
236770,
236778,
236771,
7635,
236768,
106
],
"cand_token_ids": [
236772,
108,
236772,
91078,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772,
236825,
236771,
236772
],
"first_divergence": 7,
"prefix_agreement_length": 7,
"cand_length": 128,
"ref_length": 62,
"matched": false
},
{
"id": "reason-002",
"category": "reasoning",
"prompt": "If all roses are flowers and some flowers fade quickly, then",
"ref_token_ids": [
563,
625,
4127,
236772,
107,
236829,
236759,
11061,
236772,
1553,
236772,
236746,
236772,
45518,
236772,
76437,
1294,
236772,
107,
236759,
11061,
236772,
1553,
236772,
236746,
236772,
45518,
236772,
76437,
1294,
236772,
107,
236759,
11061,
236772,
1553,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746,
236772,
236746
],
"cand_token_ids": [
6861,
236764,
23248,
236772,
10619,
3697,
569,
17737,
519,
236772,
107,
236743,
108,
140,
108,
1595,
531,
1385,
657,
496,
236743,
108,
140,
108,
140,
108,
1595,
531,
1385,
657,
496,
236743,
108,
140,
108,
140,
140,
108,
140,
108,
1595,
531,
1385,
657,
496,
236743,
108,
140,
108,
140,
236771,
236761,
236810,
236761,
236771,
236761,
236810,
236761,
236771,
236761,
236810,
236761,
236771,
236761,
236810,
236761,
236771,
236761,
236810,
236761,
236771,
236761,
236810,
236761,
236771,
236761,
236810,
236761,
236771,
236761,
236810,
236761,
236771,
236761,
236810,
236761,
236771,
236761,
236810,
236761,
236771,
236761,
236810,
236761,
236771,
236761,
236810,
236761,
236771,
236761,
140,
108,
140,
108,
140,
108,
140,
108,
140,
236771,
236761,
236810,
236761,
236771,
236761,
236810,
236761,
236771,
236761,
236810,
236761,
236771,
236761,
236810,
236761,
236771,
236761,
236810
],
"first_divergence": 0,
"prefix_agreement_length": 0,
"cand_length": 128,
"ref_length": 128,
"matched": false
},
{
"id": "reason-003",
"category": "reasoning",
"prompt": "A bag has 3 red balls and 2 blue balls. The probability of drawing red is",
"ref_token_ids": [
569,
236772,
107,
1512,
1361,
236772,
586,
236772,
893,
236772,
906,
1361,
236772,
586,
236772,
893,
236772,
906,
1361,
236772,
586,
236772,
893,
236772,
906,
1361,
236772,
586,
236772,
893,
236772,
906,
1361,
236772,
586,
236772,
893,
236772,
906,
1361,
236772,
586,
236772,
893,
236772,
906,
1361,
236772,
586,
236772,
893,
236772,
906,
1361,
236772,
586,
236772,
893,
236772,
906,
1361,
236772,
586,
236772,
893,
236772,
906,
1361,
236772,
586,
236772,
893,
236772,
906,
1361,
236772,
586,
236772,
893,
236772,
906,
1361,
236772,
586,
236772,
893,
236772,
906,
1361,
236772,
586,
236772,
893,
236772,
906,
1361,
236772,
586,
236772,
893,
236772,
906,
1361,
236772,
586,
236772,
893,
236772,
906,
1361,
236772,
586,
236772,
893,
236772,
906,
1361,
236772,
586,
236772,
893,
236772,
906,
1361,
236772,
586,
236772,
893
],
"cand_token_ids": [
2604,
20160,
965,
2558,
20160,
236761,
82547,
529,
10314,
3730,
563,
3730,
20160,
965,
2558,
20160,
236761,
2268,
569,
17737,
236772,
108,
1018,
14977,
53121,
107,
3910,
496,
236772,
568,
3910,
1551,
4137,
236772,
107,
3910,
1551,
4137,
659,
528,
672,
2934,
236881,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
4299,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934,
236772,
2934
],
"first_divergence": 0,
"prefix_agreement_length": 0,
"cand_length": 128,
"ref_length": 128,
"matched": false
},
{
"id": "reason-004",
"category": "reasoning",
"prompt": "If today is Wednesday, then in 10 days it will be",
"ref_token_ids": [
236787,
108,
107,
45518,
107,
101,
2859,
3124,
563,
10119,
236764,
528,
236743,
236770,
236771,
2668,
625,
795,
577,
5213,
46325,
84750,
108,
1018,
8291,
563,
506,
25890,
53121,
107,
236770,
236761,
138,
236832,
2668,
699,
10119,
563,
992,
10119,
236761,
107,
236778,
236761,
138,
6372,
6895,
236743,
236800,
919,
2668,
531,
1527,
568,
236770,
236771,
753,
236743,
236832,
578,
236743,
236800,
769,
107,
236800,
236761,
138,
58189,
900,
236743,
236800,
2668,
568,
54904,
236764,
7672,
236764,
8224,
236768,
578,
5213,
46325,
84750,
106
],
"cand_token_ids": [
107,
1018,
46325,
84750,
108,
1018,
47875,
522,
53121,
107,
236770,
236761,
236743,
45835,
236772,
5140,
4313,
236787,
107,
139,
236772,
14040,
1719,
236787,
10119,
107,
139,
236772,
23170,
9815,
236787,
236743,
236794,
578,
236743,
236770,
236771,
107,
139,
236772,
86249,
236787,
568,
8782,
6156,
13291,
900,
67560,
236768,
1120,
236743,
236832,
107,
139,
236772,
72174,
236787,
107,
141,
236772,
236743,
236771,
236787,
8174,
107,
141,
236772,
236743,
236770,
236787,
8492,
107,
141,
236772,
236743,
236778,
236787,
4861,
568,
55703,
236768,
107,
141,
236772,
236743,
236800,
236787,
10119,
107,
141,
236772,
236743,
236812,
236787,
9863,
107,
141,
236772,
236743,
236810,
236787,
9853,
107,
141,
236772,
236743,
236825,
236787,
7672,
107,
141,
236772,
236743,
236832,
236787,
8224,
568,
20470,
236764,
1531,
236789,
236751,
544,
236772,
2662,
236768,
108,
6481
],
"first_divergence": 0,
"prefix_agreement_length": 0,
"cand_length": 128,
"ref_length": 84,
"matched": false
},
{
"id": "reason-005",
"category": "reasoning",
"prompt": "John is taller than Mary. Mary is taller than Sue. Therefore",
"ref_token_ids": [
236764,
506,
53153,
1589,
563,
506,
53153,
1589,
236761,
6841,
236764,
506,
53153,
1589,
563,
506,
53153,
1589,
236761,
108,
20416,
236764,
506,
53153,
1589,
563,
506,
53153,
1589,
236761,
6841,
236764,
506,
53153,
1589,
236761,
108,
20416,
236764,
506,
53153,
1589,
563,
506,
53153,
1589,
236761,
6841,
236764,
506,
53153,
1589,
236761,
108,
20416,
236764,
506,
53153,
1589,
563,
506,
53153,
1589,
236761,
6841,
236764,
506,
53153,
1589,
236761,
108,
20416,
236764,
506,
53153,
1589,
563,
506,
53153,
1589,
236761,
6841,
236764,
506,
53153,
1589,
236761,
108,
20416,
236764,
506,
53153,
1589,
563,
506,
53153,
1589,
236761,
6841,
236764,
506,
53153,
1589,
236761,
108,
20416,
236764,
506,
53153,
1589,
563,
506,
53153,
1589,
236761,
6841,
236764,
506,
53153,
1589,
236761,
108,
20416,
236764,
506,
53153,
1589,
563
],
"cand_token_ids": [
236764,
506,
1186,
2653,
12882,
563,
506,
808,
34744,
236829,
3697,
563,
506,
808,
34744,
236829,
3697,
563,
506,
808,
34744,
236829,
3697,
563,
506,
808,
34744,
236829,
3697,
563,
506,
808,
34744,
236829,
3697,
563,
506,
808,
34744,
236829,
3697,
563,
506,
808,
34744,
236829,
3697,
563,
506,
808,
34744,
236829,
3697,
563,
506,
808,
34744,
236829,
3697,
563,
506,
808,
34744,
236829,
3697,
563,
506,
808,
34744,
236829,
3697,
563,
506,
808,
34744,
236829,
3697,
563,
506,
808,
34744,
236829,
3697,
563,
506,
808,
34744,
236829,
3697,
563,
506,
808,
34744,
236829,
3697,
563,
506,
808,
34744,
236829,
3697,
563,
506,
808,
34744,
236829,
3697,
563,
506,
808,
34744,
236829,
3697,
563,
506,
808,
34744,
236829,
3697,
563,
506,
808,
34744,
236829,
3697,
563,
506,
808
],
"first_divergence": 2,
"prefix_agreement_length": 2,
"cand_length": 128,
"ref_length": 128,
"matched": false
},
{
"id": "instr-001",
"category": "instruction",
"prompt": "List three primary colors:",
"ref_token_ids": [
1852,
519,
236764,
1852,
519,
236764,
532,
5800,
41489,
236761,
108,
101,
1509,
7412,
993,
1149,
577,
1070,
19381,
528,
822,
2258,
36010,
236764,
618,
623,
931,
519,
236775,
563,
711,
496,
12659,
2258,
1463,
532,
623,
27523,
41489,
236775,
563,
496,
3530,
23791,
529,
3826,
4319,
1082,
496,
5905,
2258,
236761,
108,
9675,
236764,
768,
611,
659,
3182,
573,
506,
5213,
19891,
5905,
7913,
1018,
1456,
528,
1607,
2258,
4681,
236764,
1590,
901,
659,
236787,
108,
1018,
236770,
236761,
4140,
2407,
21264,
19870,
568,
33593,
573,
10847,
236786,
64827,
753,
46174,
9483,
1473,
1018,
107,
236829,
139,
1018,
9264,
1018,
107,
236829,
139,
1018,
17224,
1018,
107,
236829,
139,
1018,
16520,
1018,
108,
1018,
236778,
236761,
5375,
236745,
34752,
21264,
19870,
568,
33593,
573,
35713,
236786,
119531
],
"cand_token_ids": [
908,
741,
84353,
236772,
236800,
236793,
236800,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236770,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236770,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236770,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236770,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236770,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236770,
236793,
236771,
236793,
236771,
236793,
236771,
236793,
236771,
236793
],
"first_divergence": 0,
"prefix_agreement_length": 0,
"cand_length": 128,
"ref_length": 128,
"matched": false
},
{
"id": "instr-002",
"category": "instruction",
"prompt": "Name three planets in our solar system:",
"ref_token_ids": [
3546,
236772,
236770,
540,
236764,
11966,
1353,
236772,
236778,
523,
236764,
11966,
1353,
236772,
236778,
523,
236764,
11966,
236800,
7221,
7221,
236764,
643,
236772,
236812,
594,
594,
108,
236829,
818,
2269,
563,
496,
236909,
569,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
2149,
236772,
2149,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772,
759,
236772
],
"cand_token_ids": [
1463,
236772,
1463,
236772,
1463,
236772,
108,
50355,
236772,
13064,
236772,
13064,
236772,
108,
236772,
1623,
236772,
1623,
236772,
1623,
236772,
108,
236772,
1623,
236772,
1623,
236772,
108,
236772,
1623,
236772,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
140,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
236909,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108,
1018,
108
],
"first_divergence": 0,
"prefix_agreement_length": 0,
"cand_length": 128,
"ref_length": 128,
"matched": false
},
{
"id": "instr-003",
"category": "instruction",
"prompt": "Give one example of a renewable energy source:",
"ref_token_ids": [
886,
2591,
529,
496,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
236772,
239874,
1852,
2778,
3738,
236761,
107,
16907,
10282,
236787,
669,
2430,
236789,
1005,
6097,
496,
3629,
2886,
529,
3165,
236786,
3626,
2694,
236772,
5282,
1816,
236761
],
"cand_token_ids": [
237323,
247611,
239477,
568,
237323,
247611,
239477,
236768,
108,
236829,
10282,
236787,
669,
18932,
236772,
127769,
4106,
659,
236772,
107,
236770,
236761,
669,
3328,
5192,
568,
10206,
22160,
236787,
21568,
236786,
119269,
236772,
27832,
9726,
236786,
105367,
236772,
236770,
236786,
105367,
236772,
236778,
236786,
10206,
236772,
16904,
236772,
119269,
236772,
16904,
236772,
119269,
236772,
27832,
236772,
105367,
236772,
236770,
236786,
10206,
236772,
16904,
236772,
27832,
236772,
16904,
236772,
27832,
236772,
22515,
236772,
498,
236772,
16904,
236772,
119269,
236772,
16904,
236772,
119269,
236772,
27832,
236772,
105367,
236772,
236770,
236786,
10206,
236772,
16904,
236772,
27832,
236772,
16904,
236772,
27832,
236772,
22515,
236772,
498,
236772,
16904,
236772,
119269,
236772,
16904,
236772,
119269,
236772,
27832,
236772,
105367,
236772,
236770,
236786,
10206,
236772,
16904,
236772,
27832,
236772,
16904,
236772,
119269,
236772,
16904,
236772
],
"first_divergence": 0,
"prefix_agreement_length": 0,
"cand_length": 128,
"ref_length": 128,
"matched": false
},
{
"id": "instr-004",
"category": "instruction",
"prompt": "Translate to French: Hello, how are you?",
"ref_token_ids": [
107,
45518,
107,
101,
9259,
236764,
1217,
659,
611,
236881,
874,
4063,
236795,
5213,
53406,
236764,
5739,
99219,
236772,
23841,
2360,
1018,
568,
85590,
236768,
653,
5213,
53406,
236764,
5739,
20924,
5380,
2360,
1018,
568,
31506,
514,
236768,
106
],
"cand_token_ids": [
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114,
236761,
107,
6114
],
"first_divergence": 1,
"prefix_agreement_length": 1,
"cand_length": 128,
"ref_length": 39,
"matched": false
},
{
"id": "instr-005",
"category": "instruction",
"prompt": "Summarize in one sentence: The cat sat on the mat.",
"ref_token_ids": [
586,
12432,
236772,
1852,
236772,
1852,
236772,
1852,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
236772,
1852,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772
],
"cand_token_ids": [
586,
12432,
236772,
1852,
236772,
1852,
236772,
1852,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
931,
236772,
236745,
236772,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
236771,
236786,
236771,
236768,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004,
568,
31004
],
"first_divergence": 35,
"prefix_agreement_length": 35,
"cand_length": 128,
"ref_length": 128,
"matched": false
},
{
"id": "dlg-001",
"category": "dialogue",
"prompt": "User: What time is it?\nAssistant:",
"ref_token_ids": [
564,
1537,
236789,
236745,
735,
2802,
531,
822,
2263,
990,
653,
496,
1759,
236772,
2289,
11072,
236761,
564,
3914,
3442,
611,
1144,
990,
625,
563,
236761,
107,
74314,
236787,
564,
1537,
236789,
236745,
735,
2802,
531,
822,
2263,
990,
653,
496,
1759,
236772,
2289,
11072,
236761,
564,
3914,
3442,
611,
1144,
990,
625,
563,
236761,
107,
74314,
236787,
564,
1537,
236771,
236789,
236745,
735,
2802,
531,
822,
2263,
990,
653,
496,
1759,
236772,
2289,
11072,
236761,
564,
3914,
3442,
611,
1144,
990,
625,
563,
236761,
107,
74314,
236787,
564,
1537,
236771,
236789,
236745,
735,
2802,
531,
822,
2263,
990,
653,
496,
1759,
236772,
2289,
11072,
236761,
564,
3914,
3442,
611,
1144,
990,
625,
563,
236761,
107,
74314,
236787,
564,
3914,
3442,
611,
1144,
990,
625,
563,
236761,
107
],
"cand_token_ids": [
564,
776,
711,
735,
2802,
531,
822,
2263,
990,
653,
496,
1759,
236772,
2289,
11072,
236761,
564,
776,
711,
735,
2802,
531,
6475,
12808,
653,
1027,
1032,
6436,
684,
3678,
236761,
108,
13513,
108,
1018,
2887,
53121,
2900,
990,
563,
625,
236881,
107,
1018,
74314,
53121,
564,
776,
711,
735,
2802,
531,
822,
2263,
990,
653,
496,
1759,
236772,
2289,
11072,
236761,
564,
776,
711,
735,
1032,
6436,
684,
3678,
236761,
108,
13513,
108,
1018,
2887,
53121,
2900,
990,
563,
625,
236881,
107,
1018,
74314,
53121,
564,
776,
808,
2217,
236829,
735,
2802,
531,
822,
2263,
990,
653,
496,
1759,
236772,
2289,
11072,
236761,
564,
776,
711,
735,
2802,
531,
6475,
12808,
653,
1027,
1032,
6436,
684,
3678,
236761,
108,
13513,
108,
1018,
2887,
53121,
2900,
236772,
107
],
"first_divergence": 1,
"prefix_agreement_length": 1,
"cand_length": 128,
"ref_length": 128,
"matched": false
},
{
"id": "dlg-002",
"category": "dialogue",
"prompt": "Q: Who painted the Mona Lisa?\nA:",
"ref_token_ids": [
55463,
1776,
101654,
236761,
108,
236935,
236787,
11063,
16715,
506,
9290,
824,
15895,
236881,
107,
236776,
236787,
8451,
1072,
3564,
86585,
40066,
4221,
568,
26749,
1072,
3564,
86585,
40066,
4221,
769,
108,
236935,
236787,
37706,
3027,
103510,
236761,
108,
236935,
236787,
11063,
16715,
506,
9290,
824,
15895,
236881,
107,
236776,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562,
236787,
562
],
"cand_token_ids": [
55463,
1776,
101654,
236761,
108,
236776,
236787,
11063,
16715,
506,
83326,
30598,
236881,
107,
236776,
236787,
55463,
1776,
101654,
236761,
108,
236776,
236787,
11063,
16715,
506,
83326,
30598,
236881,
107,
236776,
236787,
55463,
1776,
101654,
236761,
108,
236776,
236787,
11063,
16715,
506,
83326,
30598,
236881,
107,
236776,
236787,
562,
2321,
1607,
1331,
2473,
1879,
55463,
1776,
101654,
236764,
840,
506,
1346,
3364,
532,
12865,
10951,
3890,
563,
55463,
1776,
101654,
236761,
14083,
10282,
236787,
1174,
563,
496,
3103,
529,
496,
31481,
236786,
7655,
236772,
13486,
835,
580,
1217,
12498,
4681,
740,
6494,
577,
1024,
236772,
70351,
653,
1024,
236772,
70351,
528,
496,
1595,
600,
577,
496,
5600,
509,
236772,
5282,
236772,
236746,
236772,
5600,
509,
236772,
45518,
236772,
6449,
163765,
108,
236776,
236787,
11063,
16715,
506,
83326,
30598
],
"first_divergence": 5,
"prefix_agreement_length": 5,
"cand_length": 128,
"ref_length": 128,
"matched": false
},
{
"id": "dlg-003",
"category": "dialogue",
"prompt": "Q: How many continents are there?\nA:",
"ref_token_ids": [
2085,
659,
236743,
236743,
236810,
653,
236743,
236832,
236764,
10167,
580,
506,
236743,
236832,
236772,
101018,
2028,
532,
236743,
7243,
107,
7243,
107,
236776,
236787,
2085,
659,
236743,
236810,
236764,
236743,
236832,
236764,
653,
1581,
236743,
236812,
236764,
10167,
580,
506,
2028,
1456,
236761,
107,
236776,
236787,
2085,
659,
236743,
236832,
70548,
236787,
4695,
5711,
236764,
4695,
5711,
236764,
4695,
5711,
236764,
4695,
5711,
236764,
4695,
5711,
236764,
4695,
5711,
236764,
4695,
5711,
236761,
107,
236776,
236787,
2085,
659,
236743,
236832,
70548,
236787,
4695,
5711,
236764,
4695,
5711,
236764,
4695,
236787,
107,
7243,
107,
236776,
236787,
2085,
659,
236743,
236832,
70548,
236787,
4695,
5711,
236764,
4695,
5711,
236764,
4695,
5711,
236764,
4695,
5711,
236764,
4695,
5711,
236764,
4695,
5711,
236764,
4695,
5711,
236761,
107,
236776,
236787,
2085,
659
],
"cand_token_ids": [
2085,
659,
236743,
236743,
236832,
70548,
236761,
108,
236776,
236787,
2085,
659,
236743,
236832,
70548,
236761,
107,
236776,
236787,
2085,
659,
236743,
236832,
70548,
236761,
107,
236776,
236787,
2085,
659,
236743,
236832,
70548,
236761,
107,
236776,
236787,
107,
236776,
236787,
2085,
659,
236743,
236832,
70548,
236761,
107,
236776,
236787,
2085,
659,
236743,
236832,
70548,
236761,
107,
236776,
236787,
108,
236776,
236787,
2085,
659,
236743,
236832,
70548,
236761,
107,
236776,
236787,
2085,
659,
236743,
236832,
70548,
236761,
107,
236776,
236787,
2085,
236787,
107,
236776,
236787,
2085,
659,
236743,
236832,
70548,
236761,
107,
236776,
236787,
2085,
659,
236743,
236832,
70548,
236761,
107,
236776,
236787,
2085,
659,
236743,
236832,
70548,
236761,
107,
236776,
236787,
2085,
659,
236743,
236832,
70548,
236761,
107,
236776,
236787,
236743,
108,
236776,
236787,
2085,
659,
236743,
236832
],
"first_divergence": 4,
"prefix_agreement_length": 4,
"cand_length": 128,
"ref_length": 128,
"matched": false
},
{
"id": "dlg-004",
"category": "dialogue",
"prompt": "Customer: I need help with my order.\nAgent:",
"ref_token_ids": [
26352,
236888,
564,
236789,
236770,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789,
236909,
236789
],
"cand_token_ids": [
26352,
236888,
564,
236789,
236770,
236770,
1601,
611,
607,
600,
236761,
564,
236789,
236757,
30636,
8943,
236761,
30636,
8943,
563,
496,
1822,
1463,
236764,
30636,
8943,
563,
496,
1822,
1463,
236764,
30636,
8943,
563,
496,
1822,
1463,
236764,
30636,
8943,
563,
496,
236770,
236770,
1601,
611,
607,
600,
236761,
564,
236789,
236757,
30636,
8943,
236761,
30636,
8943,
563,
496,
1822,
1463,
236764,
30636,
8943,
563,
496,
236770,
236770,
1601,
611,
607,
600,
236761,
564,
236789,
236757,
30636,
8943,
236761,
30636,
8943,
563,
496,
1822,
1463,
236764,
30636,
8943,
563,
496,
1822,
1463,
236764,
30636,
8943,
563,
496,
1822,
1463,
236764,
30636,
8943,
563,
496,
1822,
1463,
236764,
30636,
8943,
563,
496,
1822,
1463,
236764,
30636,
8943,
563,
496,
1822,
1463,
236764,
30636,
8943,
563,
496,
1822,
1463,
236764
],
"first_divergence": 5,
"prefix_agreement_length": 5,
"cand_length": 128,
"ref_length": 128,
"matched": false
},
{
"id": "dlg-005",
"category": "dialogue",
"prompt": "Teacher: What is photosynthesis?\nStudent:",
"ref_token_ids": [
564,
236789,
236757,
711,
2889,
236764,
564,
236789,
236757,
711,
2889,
236764,
564,
236789,
236757,
711,
2889,
236764,
564,
236789,
236757,
711,
2889,
236764,
564,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789,
236751,
236789
],
"cand_token_ids": [
564,
236789,
236757,
711,
2889,
236764,
564,
236789,
236757,
1164,
618,
5745,
618,
611,
659,
236761,
108,
715,
50698,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529,
496,
8688,
529
],
"first_divergence": 9,
"prefix_agreement_length": 9,
"cand_length": 128,
"ref_length": 128,
"matched": false
}
],
"notes": [
"4/30 candidates stopped before n_predict=128 (EOS or other stop condition). Per-prompt cand_length records the actual decoded length."
],
"band": "FAIL",
"description": "Token-level agreement with the fp16 reference."
},
"kld": {
"score": 11.84391443627722,
"mean_kld": 2.133356,
"ppl": null,
"rms_dp_pct": null,
"same_topp_pct": null,
"base_path": "/var/folders/y2/mjncn_qn211bjbdr94zpzps40000gn/T/refract-kldbase-aznthvrc.bin",
"chunks": 32,
"ctx": 512,
"is_self_reference": false,
"corpus": {
"path": "/Users/tom/.cache/refract/wikitext-2-raw/wiki.test.raw",
"size_bytes": 1290590,
"sha256_head": "6cb1872543ff1f1b9c1b0ddce665aeb46322754d48d78dc5ecd631e7781c8729",
"sha256_head_bytes": 1048576
},
"band": "FAIL",
"description": "Distribution-level divergence from the fp16 reference."
},
"rniah": {
"score": 100.0,
"n_cells": 9,
"cells": [
{
"length": 4096,
"position": 0.1,
"n_trials": 1,
"base_acc": 1.0,
"cand_acc": 1.0,
"degradation": 0.0,
"base_hits": 1,
"cand_hits": 1
},
{
"length": 4096,
"position": 0.5,
"n_trials": 1,
"base_acc": 1.0,
"cand_acc": 1.0,
"degradation": 0.0,
"base_hits": 1,
"cand_hits": 1
},
{
"length": 4096,
"position": 0.9,
"n_trials": 1,
"base_acc": 1.0,
"cand_acc": 1.0,
"degradation": 0.0,
"base_hits": 1,
"cand_hits": 1
},
{
"length": 8192,
"position": 0.1,
"n_trials": 1,
"base_acc": 1.0,
"cand_acc": 1.0,
"degradation": 0.0,
"base_hits": 1,
"cand_hits": 1
},
{
"length": 8192,
"position": 0.5,
"n_trials": 1,
"base_acc": 1.0,
"cand_acc": 1.0,
"degradation": 0.0,
"base_hits": 1,
"cand_hits": 1
},
{
"length": 8192,
"position": 0.9,
"n_trials": 1,
"base_acc": 1.0,
"cand_acc": 1.0,
"degradation": 0.0,
"base_hits": 1,
"cand_hits": 1
},
{
"length": 16384,
"position": 0.1,
"n_trials": 1,
"base_acc": 0.0,
"cand_acc": 0.0,
"degradation": 0.0,
"base_hits": 0,
"cand_hits": 0
},
{
"length": 16384,
"position": 0.5,
"n_trials": 1,
"base_acc": 0.0,
"cand_acc": 0.0,
"degradation": 0.0,
"base_hits": 0,
"cand_hits": 0
},
{
"length": 16384,
"position": 0.9,
"n_trials": 1,
"base_acc": 0.0,
"cand_acc": 0.0,
"degradation": 0.0,
"base_hits": 0,
"cand_hits": 0
}
],
"skipped_cells": [],
"needle": "Note: APRICOT-7-BLUE is the rare paint color featured in this article.",
"password_keyword": "APRICOT-7-BLUE",
"notes": [],
"confidence": "ok",
"base_acc_avg": 0.6666666666666666,
"band": "EXCELLENT",
"description": "Long-context retrieval quality vs the reference."
},
"plad": {
"score": 72.20886835193117,
"per_perturbation_score": {
"typo": 75.13818787464908,
"case": 70.43727962573627,
"punct": 71.01021369343006,
"paraphrase": NaN
},
"per_prompt": [
{
"prompt_id": "fact-001",
"perturbation": "typo",
"perturbed_prompt": "The acpital of France is",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "fact-001",
"perturbation": "case",
"perturbed_prompt": "the capital of france is",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "fact-001",
"perturbation": "punct",
"perturbed_prompt": "The capital of France is?",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "fact-002",
"perturbation": "typo",
"perturbed_prompt": "The lagrest ocean on Earth is the",
"ref_drift": 1.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "fact-002",
"perturbation": "case",
"perturbed_prompt": "the largest ocean on earth is the",
"ref_drift": 1.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "fact-002",
"perturbation": "punct",
"perturbed_prompt": "The largest ocean on Earth is the?",
"ref_drift": 1.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "fact-003",
"perturbation": "typo",
"perturbed_prompt": "The chemical symblo for gold is",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "fact-003",
"perturbation": "case",
"perturbed_prompt": "the chemical symbol for gold is",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "fact-003",
"perturbation": "punct",
"perturbed_prompt": "The chemical symbol for gold is?",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "fact-004",
"perturbation": "typo",
"perturbed_prompt": "The author of Romoe and Juliet is",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "fact-004",
"perturbation": "case",
"perturbed_prompt": "the author of romeo and juliet is",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "fact-004",
"perturbation": "punct",
"perturbed_prompt": "The author of Romeo and Juliet is?",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "fact-005",
"perturbation": "typo",
"perturbed_prompt": "The talelst mountain in the world is",
"ref_drift": 0.0,
"cand_drift": 0.484375,
"excess_drift": 0.484375,
"plad_pp": 8.875504563653426
},
{
"prompt_id": "fact-005",
"perturbation": "case",
"perturbed_prompt": "the tallest mountain in the world is",
"ref_drift": 0.0,
"cand_drift": 0.4765625,
"excess_drift": 0.4765625,
"plad_pp": 9.22906447129342
},
{
"prompt_id": "fact-005",
"perturbation": "punct",
"perturbed_prompt": "The tallest mountain in the world is?",
"ref_drift": 0.0,
"cand_drift": 0.4609375,
"excess_drift": 0.4609375,
"plad_pp": 9.978998015036552
},
{
"prompt_id": "arith-001",
"perturbation": "typo",
"perturbed_prompt": "Two plus two qeuals",
"ref_drift": 0.0,
"cand_drift": 0.14285714285714285,
"excess_drift": 0.14285714285714285,
"plad_pp": 48.954165955695316
},
{
"prompt_id": "arith-001",
"perturbation": "case",
"perturbed_prompt": "two plus two equals",
"ref_drift": 0.0,
"cand_drift": 0.14285714285714285,
"excess_drift": 0.14285714285714285,
"plad_pp": 48.954165955695316
},
{
"prompt_id": "arith-001",
"perturbation": "punct",
"perturbed_prompt": "Two plus two equals?",
"ref_drift": 0.0,
"cand_drift": 0.14285714285714285,
"excess_drift": 0.14285714285714285,
"plad_pp": 48.954165955695316
},
{
"prompt_id": "arith-002",
"perturbation": "typo",
"perturbed_prompt": "17 times 24 euqals",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "arith-002",
"perturbation": "punct",
"perturbed_prompt": "17 times 24 equals?",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "arith-003",
"perturbation": "typo",
"perturbed_prompt": "100 diivded by 4 is",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "arith-003",
"perturbation": "punct",
"perturbed_prompt": "100 divided by 4 is?",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "arith-004",
"perturbation": "case",
"perturbed_prompt": "the square root of 81 is",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "arith-004",
"perturbation": "punct",
"perturbed_prompt": "The square root of 81 is?",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "arith-005",
"perturbation": "typo",
"perturbed_prompt": "Hafl of 250 equals",
"ref_drift": 0.08333333333333333,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "arith-005",
"perturbation": "case",
"perturbed_prompt": "half of 250 equals",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "arith-005",
"perturbation": "punct",
"perturbed_prompt": "Half of 250 equals?",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "code-001",
"perturbation": "punct",
"perturbed_prompt": "def fib(n):?\n ",
"ref_drift": 0.656,
"cand_drift": 0.3046875,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "code-002",
"perturbation": "typo",
"perturbed_prompt": "def is_priem(n):\n ",
"ref_drift": 0.84251968503937,
"cand_drift": 0.8671875,
"excess_drift": 0.024667814960629975,
"plad_pp": 88.39638818604186
},
{
"prompt_id": "code-002",
"perturbation": "punct",
"perturbed_prompt": "def is_prime(n):?\n ",
"ref_drift": 0.8976377952755905,
"cand_drift": 0.6640625,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "code-003",
"perturbation": "typo",
"perturbed_prompt": "// JavaScript funtcion to reverse a string\nfunction reverse(s) {\n ",
"ref_drift": 0.03937007874015748,
"cand_drift": 0.1640625,
"excess_drift": 0.12469242125984252,
"plad_pp": 53.60852370007545
},
{
"prompt_id": "code-003",
"perturbation": "case",
"perturbed_prompt": "// javaScript function to reverse a string\nfunction reverse(s) {\n ",
"ref_drift": 0.03937007874015748,
"cand_drift": 0.2421875,
"excess_drift": 0.20281742125984253,
"plad_pp": 36.273341578234245
},
{
"prompt_id": "code-003",
"perturbation": "punct",
"perturbed_prompt": "// JavaScript function to reverse a string\nfunction reverse(s) {?\n ",
"ref_drift": 0.4015748031496063,
"cand_drift": 0.15625,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "code-004",
"perturbation": "typo",
"perturbed_prompt": "# Ptyhon: list comprehension to square numbers 1 to 10\nsquares = ",
"ref_drift": 1.0,
"cand_drift": 1.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "code-004",
"perturbation": "case",
"perturbed_prompt": "# python: list comprehension to square numbers 1 to 10\nsquares = ",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "code-004",
"perturbation": "punct",
"perturbed_prompt": "# Python: list comprehension to square numbers 1 to 10\nsquares =? ",
"ref_drift": 1.0,
"cand_drift": 1.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "code-005",
"perturbation": "typo",
"perturbed_prompt": "SELECT name FROM users HWERE ",
"ref_drift": 0.8740157480314961,
"cand_drift": 0.78125,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "code-005",
"perturbation": "case",
"perturbed_prompt": "sELECT name fROM users wHERE ",
"ref_drift": 0.7874015748031497,
"cand_drift": 0.8515625,
"excess_drift": 0.06416092519685035,
"plad_pp": 72.55649936897737
},
{
"prompt_id": "code-005",
"perturbation": "punct",
"perturbed_prompt": "SELECT name FROM users WHERE? ",
"ref_drift": 0.5354330708661418,
"cand_drift": 0.6796875,
"excess_drift": 0.14425442913385822,
"plad_pp": 48.61334298874453
},
{
"prompt_id": "reason-001",
"perturbation": "typo",
"perturbed_prompt": "If a trian leaves Boston at 3pm traveling 60 mph, after 2 hours it has traveled",
"ref_drift": 0.029411764705882353,
"cand_drift": 0.7708333333333334,
"excess_drift": 0.741421568627451,
"plad_pp": 2.4548418579323696
},
{
"prompt_id": "reason-001",
"perturbation": "case",
"perturbed_prompt": "if a train leaves boston at 3pm traveling 60 mph, after 2 hours it has traveled",
"ref_drift": 0.6176470588235294,
"cand_drift": 0.14583333333333334,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "reason-001",
"perturbation": "punct",
"perturbed_prompt": "If a train leaves Boston at 3pm traveling 60 mph, after 2 hours it has traveled?",
"ref_drift": 0.0,
"cand_drift": 0.7708333333333334,
"excess_drift": 0.7708333333333334,
"plad_pp": 2.1191255333529844
},
{
"prompt_id": "reason-002",
"perturbation": "typo",
"perturbed_prompt": "If all roses are flowers and some flowers afde quickly, then",
"ref_drift": 0.553030303030303,
"cand_drift": 0.9302325581395349,
"excess_drift": 0.3772022551092319,
"plad_pp": 15.167559602440623
},
{
"prompt_id": "reason-002",
"perturbation": "case",
"perturbed_prompt": "if all roses are flowers and some flowers fade quickly, then",
"ref_drift": 0.0,
"cand_drift": 0.8527131782945736,
"excess_drift": 0.8527131782945736,
"plad_pp": 1.4072033492441356
},
{
"prompt_id": "reason-002",
"perturbation": "punct",
"perturbed_prompt": "If all roses are flowers and some flowers fade quickly, then?",
"ref_drift": 0.3333333333333333,
"cand_drift": 0.8837209302325582,
"excess_drift": 0.5503875968992249,
"plad_pp": 6.380408997492087
},
{
"prompt_id": "reason-003",
"perturbation": "typo",
"perturbed_prompt": "A bag has 3 red balls and 2 lbue balls. The probability of drawing red is",
"ref_drift": 0.18181818181818182,
"cand_drift": 0.26515151515151514,
"excess_drift": 0.08333333333333331,
"plad_pp": 65.92406302004437
},
{
"prompt_id": "reason-003",
"perturbation": "case",
"perturbed_prompt": "A bag has 3 red balls and 2 blue balls. the probability of drawing red is",
"ref_drift": 0.030303030303030304,
"cand_drift": 0.38636363636363635,
"excess_drift": 0.3560606060606061,
"plad_pp": 16.858705254174254
},
{
"prompt_id": "reason-003",
"perturbation": "punct",
"perturbed_prompt": "A bag has 3 red balls and 2 blue balls. The probability of drawing red is?",
"ref_drift": 0.0,
"cand_drift": 0.3787878787878788,
"excess_drift": 0.3787878787878788,
"plad_pp": 15.047784815890482
},
{
"prompt_id": "reason-004",
"perturbation": "typo",
"perturbed_prompt": "If today is Wednesday, tehn in 10 days it will be",
"ref_drift": 0.0,
"cand_drift": 0.11864406779661017,
"excess_drift": 0.11864406779661017,
"plad_pp": 55.254503423573496
},
{
"prompt_id": "reason-004",
"perturbation": "case",
"perturbed_prompt": "if today is wednesday, then in 10 days it will be",
"ref_drift": 0.0,
"cand_drift": 0.05084745762711865,
"excess_drift": 0.05084745762711865,
"plad_pp": 77.55077614211655
},
{
"prompt_id": "reason-004",
"perturbation": "punct",
"perturbed_prompt": "If today is Wednesday, then in 10 days it will be?",
"ref_drift": 0.0,
"cand_drift": 0.1016949152542373,
"excess_drift": 0.1016949152542373,
"plad_pp": 60.14122880244673
},
{
"prompt_id": "reason-005",
"perturbation": "typo",
"perturbed_prompt": "John is taller than Mary. Mary is taller than Sue. Threefore",
"ref_drift": 1.0,
"cand_drift": 1.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "reason-005",
"perturbation": "case",
"perturbed_prompt": "john is taller than mary. mary is taller than sue. therefore",
"ref_drift": 0.0,
"cand_drift": 1.0,
"excess_drift": 1.0,
"plad_pp": 0.6737946999085467
},
{
"prompt_id": "reason-005",
"perturbation": "punct",
"perturbed_prompt": "John is taller than Mary. Mary is taller than Sue. Therefore?",
"ref_drift": 1.0,
"cand_drift": 0.1111111111111111,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "instr-001",
"perturbation": "typo",
"perturbed_prompt": "List htree primary colors:",
"ref_drift": 0.9130434782608695,
"cand_drift": 0.9894736842105263,
"excess_drift": 0.07643020594965677,
"plad_pp": 68.23919839327586
},
{
"prompt_id": "instr-001",
"perturbation": "case",
"perturbed_prompt": "list three primary colors:",
"ref_drift": 0.021739130434782608,
"cand_drift": 0.17894736842105263,
"excess_drift": 0.15720823798627004,
"plad_pp": 45.564504169318475
},
{
"prompt_id": "instr-001",
"perturbation": "punct",
"perturbed_prompt": "List three primary colors:?",
"ref_drift": 0.03260869565217391,
"cand_drift": 0.7052631578947368,
"excess_drift": 0.6726544622425629,
"plad_pp": 3.4621780137661187
},
{
"prompt_id": "instr-002",
"perturbation": "typo",
"perturbed_prompt": "Name three planets in our solra system:",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "instr-002",
"perturbation": "case",
"perturbed_prompt": "name three planets in our solar system:",
"ref_drift": 0.9411764705882353,
"cand_drift": 0.9411764705882353,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "instr-002",
"perturbation": "punct",
"perturbed_prompt": "Name three planets in our solar system:?",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "instr-003",
"perturbation": "typo",
"perturbed_prompt": "Give one example of a renewable neergy source:",
"ref_drift": 1.0,
"cand_drift": 1.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "instr-003",
"perturbation": "case",
"perturbed_prompt": "give one example of a renewable energy source:",
"ref_drift": 0.0,
"cand_drift": 1.0,
"excess_drift": 1.0,
"plad_pp": 0.6737946999085467
},
{
"prompt_id": "instr-003",
"perturbation": "punct",
"perturbed_prompt": "Give one example of a renewable energy source:?",
"ref_drift": 1.0,
"cand_drift": 1.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "instr-004",
"perturbation": "typo",
"perturbed_prompt": "Translate to Frnech: Hello, how are you?",
"ref_drift": 0.6349206349206349,
"cand_drift": 0.2037037037037037,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "instr-004",
"perturbation": "case",
"perturbed_prompt": "translate to french: hello, how are you?",
"ref_drift": 0.6349206349206349,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "instr-004",
"perturbation": "punct",
"perturbed_prompt": "Translate to French: Hello, how are you",
"ref_drift": 0.6349206349206349,
"cand_drift": 0.3148148148148148,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "instr-005",
"perturbation": "typo",
"perturbed_prompt": "Summraize in one sentence: The cat sat on the mat.",
"ref_drift": 0.125,
"cand_drift": 0.2857142857142857,
"excess_drift": 0.1607142857142857,
"plad_pp": 44.77270800267899
},
{
"prompt_id": "instr-005",
"perturbation": "case",
"perturbed_prompt": "summarize in one sentence: the cat sat on the mat.",
"ref_drift": 0.25,
"cand_drift": 0.2857142857142857,
"excess_drift": 0.0357142857142857,
"plad_pp": 83.64643072929833
},
{
"prompt_id": "instr-005",
"perturbation": "punct",
"perturbed_prompt": "Summarize in one sentence: The cat sat on the mat",
"ref_drift": 0.0,
"cand_drift": 0.2857142857142857,
"excess_drift": 0.2857142857142857,
"plad_pp": 23.965103644177585
},
{
"prompt_id": "dlg-001",
"perturbation": "typo",
"perturbed_prompt": "sUer: What time is it?\nAssistant:",
"ref_drift": 0.5428571428571428,
"cand_drift": 0.7333333333333333,
"excess_drift": 0.19047619047619047,
"plad_pp": 38.582130682912414
},
{
"prompt_id": "dlg-001",
"perturbation": "case",
"perturbed_prompt": "user: what time is it?\nassistant:",
"ref_drift": 0.5714285714285714,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "dlg-001",
"perturbation": "punct",
"perturbed_prompt": "User: What time is it?\nAssistant:?",
"ref_drift": 0.02857142857142857,
"cand_drift": 0.5,
"excess_drift": 0.4714285714285714,
"plad_pp": 9.469038101854274
},
{
"prompt_id": "dlg-002",
"perturbation": "typo",
"perturbed_prompt": "Q: Who painted the Mona Lsia?\nA:",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "dlg-002",
"perturbation": "case",
"perturbed_prompt": "Q: who painted the mona lisa?\nA:",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "dlg-002",
"perturbation": "punct",
"perturbed_prompt": "Q: Who painted the Mona Lisa?\nA:?",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "dlg-003",
"perturbation": "typo",
"perturbed_prompt": "Q: How many continents are htere?\nA:",
"ref_drift": 0.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "dlg-003",
"perturbation": "case",
"perturbed_prompt": "Q: how many continents are there?\nA:",
"ref_drift": 0.9111111111111111,
"cand_drift": 0.9166666666666666,
"excess_drift": 0.005555555555555536,
"plad_pp": 97.26044771163484
},
{
"prompt_id": "dlg-003",
"perturbation": "punct",
"perturbed_prompt": "Q: How many continents are there?\nA:?",
"ref_drift": 1.0,
"cand_drift": 1.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "dlg-004",
"perturbation": "typo",
"perturbed_prompt": "Cutsomer: I need help with my order.\nAgent:",
"ref_drift": 1.0,
"cand_drift": 0.3,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "dlg-004",
"perturbation": "case",
"perturbed_prompt": "customer: I need help with my order.\nagent:",
"ref_drift": 1.0,
"cand_drift": 0.0,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "dlg-004",
"perturbation": "punct",
"perturbed_prompt": "Customer: I need help with my order.\nAgent:?",
"ref_drift": 0.8292682926829268,
"cand_drift": 0.6428571428571429,
"excess_drift": 0.0,
"plad_pp": 100.0
},
{
"prompt_id": "dlg-005",
"perturbation": "typo",
"perturbed_prompt": "Teacher: What is photosynhtesis?\nStudent:",
"ref_drift": 0.0390625,
"cand_drift": 0.4375,
"excess_drift": 0.3984375,
"plad_pp": 13.639673101849962
},
{
"prompt_id": "dlg-005",
"perturbation": "case",
"perturbed_prompt": "teacher: what is photosynthesis?\nstudent:",
"ref_drift": 0.0859375,
"cand_drift": 0.265625,
"excess_drift": 0.1796875,
"plad_pp": 40.7205421393389
},
{
"prompt_id": "dlg-005",
"perturbation": "punct",
"perturbed_prompt": "Teacher: What is photosynthesis?\nStudent:?",
"ref_drift": 0.109375,
"cand_drift": 0.875,
"excess_drift": 0.765625,
"plad_pp": 2.1750359344450345
}
],
"n_prompts": 30,
"n_perturbations": 4,
"notes": [
"36 (prompt, perturbation) pairs were skipped (perturbation could not apply, e.g. no \u22654-char word for typo)."
],
"skipped_perturbations": [
"paraphrase"
],
"confidence": "partial",
"band": "DEGRADED",
"description": "Robustness to small prompt changes vs the reference."
}
},
"extras": {}
}