framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,window_size,beam_width,attn_dtype,kv_cache_dtype,step,latency
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,1,0.02532266577084859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,3,0.02459733436505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,7,0.024671999116738636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,1,0.023669332265853882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,3,0.023221333821614582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,15,0.02330133318901062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,31,0.02521066615978877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,31,0.023647998770078022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,63,0.025487999121348064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,15,0.02517866591612498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,63,0.024517332514127094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,127,0.025125332176685333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,7,0.02314666658639908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,127,0.023445333043734234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,255,0.025231999655564625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,255,0.023247999449570973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,511,0.03789333254098892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,511,0.031680000325044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,1023,0.05624533196290334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,1023,0.049770668148994446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,2047,0.09696533282597859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,2047,0.08310399949550629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,1,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,4095,0.17281067371368408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,1,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,4095,0.14447999993960062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,3,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,3,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,7,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,7,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,15,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,15,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,31,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,31,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,63,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,63,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,127,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,127,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,255,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,255,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,511,0.012479999413092932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,511,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,1023,0.011866666376590729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,1023,0.012736000120639801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,2047,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,2047,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,4095,0.016837333639462788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,4095,0.016789333273967106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,8191,0.02107733239730199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,8191,0.019530666371186573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,16383,0.035375999907652535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,1,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,16383,0.02906133234500885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,1,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,3,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,3,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,7,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,7,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,15,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,15,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,31,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,31,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,63,0.010575999816258749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,127,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,63,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,127,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,255,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,255,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,511,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,511,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,1023,0.012789333860079447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,2047,0.016538667182127636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,1023,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,2047,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,4095,0.021381333470344543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,4095,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,8191,0.0348693331082662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,8191,0.029557332396507263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,16383,0.056421334544817604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,16383,0.04921066761016846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,1,0.03961066653331121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,1,0.03745066622893015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,3,0.039488000174363456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,7,0.0397119993964831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,7,0.03552533437808355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,3,0.03559466699759165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,15,0.03630933413902918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,31,0.03982933362325033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,15,0.03976000100374222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,31,0.037658666570981346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,63,0.03951466580231985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,63,0.037647999823093414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,127,0.03973866750796636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,255,0.041946664452552795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,127,0.03550933301448822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,511,0.062277331948280334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,511,0.054192001620928444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,1023,0.0974826713403066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,1023,0.08493333061536153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,2047,0.17428267002105713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,2047,0.14622933665911356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,1,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,1,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,3,0.011866666376590729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,3,0.012736000120639801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,7,0.0122079998254776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,7,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,15,0.012597333639860153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,15,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,31,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,31,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,63,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,63,0.011519999553759893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,127,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,127,0.011413333316644033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,255,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,255,0.011440000186363855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,511,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,511,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,1023,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,1023,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,2047,0.02295999974012375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,2047,0.02145066608985265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,4095,0.037808001041412354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,4095,0.03123733401298523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,8191,0.06007466713587443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,8191,0.05230933427810669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,16383,0.1029919981956482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,16383,0.08701866865158081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,1,0.06842133402824402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,1,0.062314664324124656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,3,0.06990933418273926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,3,0.06280000011126201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,7,0.06871999800205231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,7,0.06251733501752217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,15,0.06855466465155284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,15,0.06237866481145223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,31,0.06860800087451935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,31,0.062447999914487205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,63,0.06850133339564006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,63,0.06270933151245117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,127,0.07054399947325389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,127,0.06243733565012614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,255,0.06447466711203258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,255,0.07056533296902974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,511,0.09520000219345093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,511,0.11137066284815471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,1023,0.1546346644560496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,1023,0.1816800038019816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,1,0.13011200229326883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,1,0.11541866262753804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,3,0.11561066905657451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,3,0.12995733817418417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,7,0.11542399724324544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,7,0.12995733817418417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,15,0.13147733608881632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,15,0.11542399724324544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,31,0.12988799810409546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,63,0.12973333398501077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,63,0.11546666423479716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,31,0.11627200245857239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,127,0.1156213382879893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,127,0.1277653376261393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,255,0.12921067078908285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,255,0.1156213382879893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,float16,1,0.2488106687863668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,fp8,1,0.22412800788879395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,float16,3,0.24889600276947021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,fp8,3,0.22311999400456747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,float16,7,0.24951465924580893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,fp8,7,0.22242667277654013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,float16,15,0.24874132871627808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,fp8,15,0.2241706649462382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,255,0.03605866680542628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,float16,31,0.24830400943756104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,fp8,31,0.22288533051808676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,float16,63,0.25037866830825806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,1,0.01746133342385292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,fp8,63,0.2234613299369812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,1,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,float16,127,0.2426453431447347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,3,0.014666666587193808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,3,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,fp8,127,0.22095467646916708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,7,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,7,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,15,0.014805333067973455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,31,0.014021333307027817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,31,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,63,0.01481066644191742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,63,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,127,0.01341333364446958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,127,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,255,0.013616000612576803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,255,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,511,0.01735466718673706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,511,0.018565333137909572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,1023,0.021477334201335907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,1023,0.021018666525681812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,2047,0.039733332892258964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,2047,0.03146666785081228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,4095,0.06069866816202799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,4095,0.05294933418432871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,8191,0.1036906639734904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,8191,0.08875733613967896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,16383,0.18927999337514242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,16383,0.15869866808255514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,15,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,float16,1,0.4850613276163737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,fp8,1,0.4368799924850464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,float16,3,0.4882986545562744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,fp8,3,0.4334933360417684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,float16,7,0.4888373215993245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,fp8,7,0.4357653458913167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,fp8,15,0.4373013178507487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,float16,15,0.4845920006434123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,float16,31,0.4872693220774333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,fp8,31,0.4368640184402466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,float16,63,0.4848959843317668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,fp8,63,0.4357759952545166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,fp8,1,0.8553280035654703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,float16,1,0.9637119770050049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,fp8,3,0.8583947022755941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,float16,3,0.9583893616994222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,fp8,7,0.8598453203837076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,float16,7,0.9575520356496176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,float16,15,0.9618773460388184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,1,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,fp8,15,0.8546506563822428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,1,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,3,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,3,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,7,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,7,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,15,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,15,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,31,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,31,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,63,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,63,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,float16,31,0.9647093613942465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,fp8,31,0.8552640279134115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,127,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,127,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,255,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,255,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,511,0.022848000129063923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,511,0.02109866589307785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,1023,0.029077333708604176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,1023,0.035317334036032356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,2047,0.05648533503214518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,2047,0.04977599779764811
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,4095,0.08099199831485748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,4095,0.09473066528638203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,8191,0.14141333103179932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,8191,0.17010132471720377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,1,0.027248000105222065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,1,0.025040000677108765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,3,0.027632000545660656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,7,0.02737066646416982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,3,0.025066666305065155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,7,0.025077333052953083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,15,0.02739733209212621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,15,0.02334933231274287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,31,0.027456000447273254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,31,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,63,0.027669332921504974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,63,0.023408000667889912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,127,0.027301333844661713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,127,0.025258667767047882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,255,0.03362133353948593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,255,0.023402666052182514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,511,0.05401599903901418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,511,0.037685332198937736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,1023,0.05388266841570536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,1023,0.0925546685854594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,2047,0.0960586667060852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,2047,0.1747786601384481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,1,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,1,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,4095,0.16910932461420694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,3,0.012709333250919977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,3,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,4095,0.33084267377853394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,7,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,15,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,7,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,15,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,31,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,31,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,63,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,63,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,127,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,127,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,255,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,255,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,511,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,511,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,1023,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,1023,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,2047,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,2047,0.014730667074521383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,4095,0.019050666441520054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,4095,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,8191,0.03552533437808355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,8191,0.021482666333516438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,1,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,16383,0.05811200042565664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,16383,0.03527999917666117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,1,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,3,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,3,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,7,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,7,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,15,0.011237333218256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,15,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,31,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,31,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,63,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,63,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,127,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,127,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,255,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,255,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,511,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,511,0.011445333560307821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,1023,0.013477332890033722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,1023,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,2047,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,2047,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,4095,0.03555200000603994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,4095,0.02103466788927714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,8191,0.0574239989121755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,8191,0.03572266548871994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,16383,0.10145599643389384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,1,0.04391466577847799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,16383,0.05494933327039083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,1,0.039477333426475525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,3,0.045994664231936135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,3,0.03938666731119156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,7,0.045797333121299744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,7,0.03988266736268997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,15,0.04593066871166229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,15,0.03953066716591517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,31,0.045893331368764244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,31,0.03957866628964742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,63,0.045194665590922035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,63,0.03997866561015447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,127,0.04827199876308441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,127,0.037690666814645134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,255,0.054325332244237266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,255,0.04137066751718521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,511,0.09537067015965779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,511,0.05880533158779144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,1023,0.17092265685399374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,1023,0.09433066844940186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,1,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,2047,0.33036800225575763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,1,0.012752000242471695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,2047,0.17083734273910522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,3,0.012448000411192576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,3,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,7,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,7,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,15,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,15,0.01267733300725619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,31,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,31,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,63,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,63,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,127,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,127,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,255,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,255,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,511,0.01458666721979777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,511,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,1023,0.018789333601792652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,1023,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,2047,0.037962667644023895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,2047,0.0233599990606308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,4095,0.060640002290407814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,4095,0.0377813329299291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,8191,0.10266133149464925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,8191,0.06012799839178721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,16383,0.19009600083033243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,1,0.06833066542943318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,16383,0.1013706624507904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,1,0.08309866487979889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,3,0.08285866677761078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,3,0.06834666430950165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,7,0.08267199993133545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,7,0.06884266436100006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,15,0.08262399832407634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,15,0.06820799907048543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,31,0.08260799944400787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,31,0.06875733534495036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,63,0.08276799817879994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,63,0.06826133529345195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,127,0.08339200417200725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,255,0.07025599976380666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,255,0.09644800424575806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,127,0.06890666484832764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,511,0.17459199825922647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,511,0.10569600264231364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,1023,0.17459734280904135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,1023,0.32475199302037555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,1,0.1527413328488668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,1,0.12826133767763773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,3,0.1527359982331594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,3,0.12814399600028992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,7,0.15249600013097128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,7,0.12786133090655008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,15,0.1532319982846578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,15,0.12780267000198364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,31,0.15264000495274863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,31,0.12876799702644348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,63,0.15285866459210715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,63,0.12782933314641318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,127,0.15334399541219076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,127,0.12594133615493774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,255,0.17935466766357422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,255,0.12812800208727518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,float16,1,0.29332266251246136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,float16,3,0.29411200682322186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,fp8,1,0.24486400683720908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,fp8,3,0.24478934208552042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,fp8,7,0.24458666642506918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,float16,7,0.295141339302063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,float16,15,0.2939680020014445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,fp8,15,0.2446720004081726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,float16,31,0.2943039933840434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,fp8,31,0.24464533726374307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,float16,63,0.29365332921346027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,fp8,63,0.2461706598599752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,1,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,float16,127,0.2937120000521342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,1,0.01321600005030632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,3,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,3,0.013253333667914072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,7,0.01569066693385442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,7,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,fp8,127,0.23955732583999634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,15,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,31,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,15,0.01551466683546702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,31,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,63,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,63,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,127,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,127,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,255,0.01471466695268949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,255,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,511,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,511,0.016773333152135212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,1023,0.03446933378775915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,1023,0.021344001094500225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,2047,0.06070399781068166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,2047,0.03945599993069967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,4095,0.10194666186968486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,4095,0.0609440008799235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,8191,0.10166933139165242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,8191,0.1888266603151957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,16383,0.18592000007629395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,16383,0.36108799775441486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,fp8,1,0.47811734676361084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,float16,1,0.578005313873291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,fp8,3,0.47605331738789874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,float16,3,0.5744106769561768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,fp8,7,0.47997868061065674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,float16,7,0.5761760075887045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,fp8,15,0.475930651028951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,float16,15,0.5746133327484131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,float16,31,0.5781813462575277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,fp8,31,0.4782133499781291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,fp8,63,0.47649598121643066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,float16,63,0.574181318283081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,fp8,1,0.9420693715413412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,float16,1,1.1408426761627197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,fp8,3,0.9384586811065674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,float16,3,1.1333759625752766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,fp8,7,0.9443999926249186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,float16,7,1.1378719806671143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,float16,15,1.133023977279663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,fp8,15,0.9380640188852946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,1,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,1,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,3,0.018917333334684372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,3,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,7,0.018778666853904724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,7,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,15,0.019007999449968338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,15,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,31,0.01878400022784869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,31,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,63,0.018944000204404194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,63,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,127,0.01884799947341283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,127,0.016858667135238647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,255,0.018911999960740406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,255,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,511,0.03345600018898646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,511,0.023423999547958374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,fp8,31,0.9422667026519775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,float16,31,1.141429344813029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,1023,0.05429333448410034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,1023,0.03369066615899404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,2047,0.05765866736570994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,2047,0.09706133604049683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,4095,0.1739893356959025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,4095,0.09453866879145305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,1,0.021295999487241108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,8191,0.3338773250579834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,8191,0.16806934277216592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,1,0.020992000897725422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,3,0.021407999098300934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,3,0.02146133283774058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,7,0.021407999098300934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,7,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,15,0.021146667500336964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,15,0.021029333273569744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,31,0.021514666577180225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,31,0.02102400114138921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,63,0.021104000508785248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,63,0.021136000752449036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,127,0.02165866643190384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,127,0.02067199970285098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,255,0.021045332153638203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,255,0.021354667842388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,511,0.030640001098314922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,511,0.027317332724730175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,1023,0.046495998899141945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,1023,0.039813332259655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,2047,0.0795360008875529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,2047,0.06856533388296764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,4095,0.13846932848294577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,1,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,4095,0.11758400003115337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,1,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,3,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,3,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,7,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,7,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,15,0.010480000327030817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,15,0.012629333883523941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,31,0.010064000263810158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,31,0.010538666198650995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,63,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,63,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,127,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,127,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,255,0.010298666854699453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,255,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,511,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,511,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,1023,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,1023,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,2047,0.014853333433469137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,2047,0.014378666877746582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,4095,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,4095,0.016821333517630894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,8191,0.019434666881958645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,8191,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,16383,0.027888000011444092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,1,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,3,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,1,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,16383,0.025072000920772552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,3,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,7,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,7,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,15,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,15,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,31,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,63,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,31,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,63,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,127,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,127,0.011370666325092316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,255,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,255,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,511,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,511,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,1023,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,1023,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,2047,0.015504000087579092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,2047,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,4095,0.021557333568731945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,4095,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,8191,0.03336533407370249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,8191,0.029306667546431225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,16383,0.055248002211252846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,1,0.03323200096686681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,1,0.029493334392706554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,16383,0.04786666731039683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,3,0.0335359995563825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,3,0.031445334355036415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,7,0.03161066770553589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,7,0.031023999055226643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,15,0.03323733309904734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,15,0.03129599988460541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,31,0.03137599925200144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,31,0.029674666623274486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,63,0.033258666594823204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,63,0.029930666089057922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,127,0.03183466692765554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,127,0.03156800071398417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,255,0.03323733309904734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,255,0.029461334149042766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,511,0.0510453333457311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,511,0.04387199878692627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,1023,0.07685333490371704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,1023,0.06821333368619283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,2047,0.13595733046531677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,1,0.012181332955757776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,1,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,3,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,2047,0.11564800143241882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,3,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,7,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,7,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,15,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,15,0.012757333616415659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,31,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,31,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,63,0.012725333372751871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,63,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,127,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,127,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,255,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,255,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,511,0.01333333303531011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,511,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,1023,0.01543466622630755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,1023,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,2047,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,2047,0.021349333226680756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,4095,0.03521066655715307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,4095,0.029530666768550873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,8191,0.06490666667620341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,8191,0.05009066561857859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,16383,0.10386666655540466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,1,0.05414933462937673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,16383,0.08814400434494019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,1,0.049738665421803795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,3,0.05397333204746246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,3,0.0503359983364741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,7,0.05385066568851471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,7,0.050016000866889954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,15,0.056176001826922096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,15,0.04985600213209788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,31,0.054383998115857445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,31,0.04990399877230326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,63,0.05385066568851471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,63,0.05026133358478546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,127,0.05734399954477946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,127,0.048783997694651283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,255,0.05622933308283488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,255,0.05202133456865946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,511,0.0867146650950114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,511,0.07562133173147838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,1023,0.14168000221252441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,1023,0.11975466211636861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,1,0.10123200217882793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,1,0.08922666311264038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,3,0.09948266545931499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,3,0.08906666437784831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,7,0.10111467043558757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,7,0.08894399801890056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,15,0.10044800241788228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,15,0.0888853371143341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,31,0.10001599788665771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,31,0.08868799606959026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,63,0.10073066751162212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,63,0.08909866213798523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,127,0.0900266667207082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,127,0.09991466999053955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,255,0.1011306643486023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,255,0.09085866808891296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,float16,1,0.1895786722501119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,float16,3,0.19155732790629068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,fp8,3,0.17084266742070517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,fp8,1,0.17188799381256104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,float16,7,0.18969599405924478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,fp8,7,0.17284266153971353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,float16,15,0.18930667638778687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,fp8,15,0.1713119943936666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,float16,31,0.18979199727376303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,fp8,31,0.17275200287501016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,float16,63,0.18937599658966064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,fp8,63,0.1712906757990519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,1,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,float16,127,0.18550399939219156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,1,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,3,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,fp8,127,0.17074666420618692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,3,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,7,0.012805332740147909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,7,0.012794667234023413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,15,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,15,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,31,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,31,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,63,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,63,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,127,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,127,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,255,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,255,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,511,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,511,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,1023,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,1023,0.0174346665541331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,2047,0.03136000037193298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,2047,0.025199999411900837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,4095,0.05053333441416422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,4095,0.04159999887148539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,8191,0.08404266834259033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,8191,0.07018133501211803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,16383,0.1470186710357666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,16383,0.12198932965596516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,float16,1,0.36956266562143963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,fp8,1,0.32979732751846313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,float16,3,0.3691573143005371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,fp8,3,0.33089067538579303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,float16,7,0.36764800548553467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,fp8,7,0.3309386571248372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,float16,15,0.3701333204905192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,fp8,15,0.3300480047861735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,float16,31,0.3691893418629964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,fp8,31,0.3322506745656331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,float16,63,0.3678239981333415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,fp8,63,0.3306559920310974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,float16,1,0.726213296254476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,fp8,1,0.6444053252538046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,float16,3,0.7238826751708984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,fp8,3,0.6459466616312662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,float16,7,0.7266133626302084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,fp8,7,0.6438560088475546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,float16,15,0.7250133355458578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,1,0.01639466608564059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,1,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,fp8,15,0.645578662554423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,3,0.016719999412695568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,3,0.01589866727590561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,7,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,7,0.016341333587964375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,15,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,15,0.016261332978804905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,31,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,31,0.01599466676513354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,63,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,63,0.015498666713635126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,127,0.01670933390657107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,float16,31,0.7261599699656168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,fp8,31,0.6442026694615682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,127,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,255,0.01642666632930438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,255,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,511,0.020197333147128422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,511,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,1023,0.029861333469549816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,1023,0.025648000339667004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,2047,0.0499839981396993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,2047,0.04389866689840952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,4095,0.08265066643555959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,4095,0.07063999772071838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,1,0.02342933416366577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,1,0.02139200021823247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,8191,0.1222826639811198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,8191,0.14800533652305603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,3,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,3,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,7,0.02317333221435547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,7,0.021386665602525074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,15,0.023317334552605946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,15,0.02109866589307785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,31,0.023552000522613525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,31,0.02139200021823247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,63,0.02319466571013133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,63,0.02111999938885371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,127,0.02317333221435547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,127,0.02144533395767212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,255,0.025514667232831318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,255,0.02125866711139679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,511,0.030666666726271313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,511,0.0436160018046697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,1023,0.07313066720962524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,1023,0.04474666714668274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,2047,0.07684266567230225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,2047,0.13704533378283182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,1,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,1,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,4095,0.13567466537157694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,3,0.01022933361430963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,4095,0.259226659933726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,3,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,7,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,7,0.010549332946538925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,15,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,15,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,31,0.010234666367371878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,31,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,63,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,63,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,127,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,127,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,255,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,255,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,511,0.011535999675591787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,511,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,1023,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,1023,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,2047,0.015829333414634068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,2047,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,4095,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,4095,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,8191,0.025392000873883564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,8191,0.02041600023706754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,1,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,16383,0.046944002310434975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,1,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,16383,0.027466667195161183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,3,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,3,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,7,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,7,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,15,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,15,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,31,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,31,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,63,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,63,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,127,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,127,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,255,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,255,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,511,0.012730666746695837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,511,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,1023,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,1023,0.012831999609867731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,2047,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,2047,0.016741332908471424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,4095,0.029637334247430164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,4095,0.02102400114138921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,8191,0.05439466734727224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,8191,0.03334933271010717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,16383,0.0906880001227061
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,1,0.03743999948104223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,16383,0.053871999184290566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,1,0.031386665999889374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,3,0.03678400069475174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,3,0.03163733333349228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,7,0.03572800010442734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,7,0.031680000325044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,15,0.03611200054486593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,15,0.03165333221356074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,31,0.03757333258787791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,31,0.032469332218170166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,63,0.03750933210055033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,63,0.03178133318821589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,127,0.039477333426475525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,127,0.03133333226044973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,255,0.04433600107828776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,255,0.03384000062942505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,511,0.07412800192832947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,511,0.04804799954096476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,1023,0.13134400049845377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,1023,0.07451733450094859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,2047,0.2527359922726949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,1,0.011776000261306763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,2047,0.1325546701749166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,1,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,3,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,3,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,7,0.011488000551859537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,7,0.012538666526476542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,15,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,15,0.01180800050497055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,31,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,31,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,63,0.012805332740147909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,63,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,127,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,127,0.011999999483426413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,255,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,255,0.012752000242471695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,511,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,511,0.013290667285521826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,1023,0.018725333114465077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,1023,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,2047,0.031231999397277832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,2047,0.023605334262053173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,4095,0.059903999169667564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,4095,0.035429333647092186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,8191,0.09880533814430237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,8191,0.06211199859778086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,16383,0.17668799559275308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,16383,0.10133333007494609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,1,0.06513066589832306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,1,0.053770666321118675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,3,0.054005334774653115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,3,0.06444799900054932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,7,0.06447466711203258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,7,0.05434666574001312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,15,0.06426666676998138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,15,0.05392000079154968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,31,0.0528053343296051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,31,0.06432533264160156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,63,0.06431999802589417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,63,0.053173333406448364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,127,0.06619200110435486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,127,0.05398933092753092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,255,0.05601066847642263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,255,0.07443200051784515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,511,0.13376532991727194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,511,0.08441600203514099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,1023,0.13397866487503052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,1023,0.24485333760579428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,1,0.11972266435623169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,1,0.09935466448465984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,3,0.1178986628850301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,3,0.09937600294748943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,7,0.11780800422032674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,7,0.09929066896438599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,15,0.1185653309027354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,15,0.09960533181826274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,31,0.11773332953453064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,31,0.0993173321088155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,63,0.11769599715868632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,63,0.09938666224479675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,127,0.11870400110880534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,127,0.09918399651845296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,255,0.13826666275660196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,255,0.10115733742713928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,float16,1,0.2241119941075643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,fp8,1,0.1875200072924296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,float16,3,0.224671999613444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,fp8,3,0.1874826749165853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,float16,7,0.22423466046651205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,fp8,7,0.18773333231608072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,float16,15,0.22442134221394858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,fp8,15,0.18873600165049234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,float16,31,0.22405334313710532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,fp8,31,0.1874826749165853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,float16,63,0.2238773306210836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,float16,127,0.22512000799179077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,1,0.013274667163689932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,1,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,3,0.013349333157142004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,fp8,63,0.1880106727282206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,fp8,127,0.18523732821146646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,3,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,7,0.013295999417702356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,7,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,15,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,15,0.013082666943470636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,31,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,31,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,63,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,63,0.013381333400805792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,127,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,127,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,255,0.013167999684810638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,255,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,511,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,511,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,1023,0.023728000621000927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,1023,0.01926933353145917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,2047,0.049253334601720176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,2047,0.03156800071398417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,4095,0.08255466818809509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,4095,0.050767997900644936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,8191,0.14687466621398926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,8191,0.0817333310842514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,16383,0.1434826652208964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,16383,0.27321066459019977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,float16,1,0.43326401710510254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,fp8,1,0.3620906670888265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,float16,3,0.4352906545003255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,fp8,3,0.3612106641133626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,fp8,7,0.3617386817932129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,float16,7,0.4331520001093547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,fp8,15,0.3615146478017171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,float16,15,0.4356000026067098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,fp8,31,0.36050665378570557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,float16,31,0.43509864807128906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,fp8,63,0.3631360133488973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,float16,63,0.43323198954264325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,fp8,1,0.7109920183817545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,float16,1,0.8532000382741293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,fp8,3,0.7095626990000407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,float16,3,0.8583733240763346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,fp8,7,0.7085653146107992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,float16,7,0.8551786740620931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,fp8,15,0.7117013136545817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,float16,15,0.851797342300415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,1,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,1,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,3,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,3,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,7,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,7,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,15,0.017445333302021027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,15,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,31,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,31,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,63,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,63,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,fp8,31,0.711130698521932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,float16,31,0.8531893094380697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,127,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,127,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,255,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,255,0.016672000288963318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,511,0.025311999022960663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,511,0.019365333020687103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,1023,0.04618666569391886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,1023,0.028368001182874043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,2047,0.08061866462230682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,2047,0.04987200101216634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,4095,0.14657599727312723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,4095,0.0811466674009959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,1,0.019354666272799175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,8191,0.2766079902648926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,8191,0.14486400286356607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,1,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,3,0.019424000134070713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,3,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,7,0.019466667125622433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,7,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,15,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,15,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,31,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,31,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,63,0.019343999524911244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,63,0.018895999838908512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,127,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,127,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,255,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,255,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,511,0.026922665536403656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,511,0.023306667804718018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,1023,0.03990400085846583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,1023,0.03369600077470144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,2047,0.06855466465155284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,2047,0.0587360014518102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,4095,0.11620799700419109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,1,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,4095,0.09927999973297119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,1,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,3,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,3,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,7,0.009082666908701261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,7,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,15,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,15,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,31,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,31,0.010581333190202713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,63,0.009045333291093508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,63,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,127,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,127,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,255,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,255,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,511,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,511,0.011930666863918304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,1023,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,1023,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,2047,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,2047,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,4095,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,4095,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,8191,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,8191,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,16383,0.025285333395004272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,1,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,16383,0.023370665808518726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,1,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,3,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,3,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,7,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,7,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,15,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,15,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,31,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,31,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,63,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,63,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,127,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,127,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,255,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,255,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,511,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,511,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,1023,0.012645332763592402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,1023,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,2047,0.014896000425020853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,2047,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,4095,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,4095,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,8191,0.03154666721820831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,8191,0.02733866622050603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,16383,0.056128000219662987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,1,0.029296000798543293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,16383,0.04566933214664459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,1,0.027477333943049114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,3,0.029391999046007793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,3,0.027232001225153606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,7,0.029232000311215717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,7,0.027493332823117573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,15,0.029296000798543293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,15,0.02739199995994568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,31,0.029290666182835896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,31,0.02719466636578242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,63,0.027744000156720478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,63,0.026485333840052288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,127,0.027349332968393963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,127,0.027141332626342773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,255,0.027647999425729115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,255,0.027402666707833607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,511,0.0436106671889623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,511,0.0355679988861084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,1023,0.06565333406130473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,1023,0.05798399945100149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,1,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,2047,0.1153546671072642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,2047,0.09724799791971843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,1,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,3,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,3,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,7,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,7,0.012437333663304647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,15,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,15,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,31,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,31,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,63,0.012015999605258306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,63,0.012047999848922094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,127,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,127,0.011973333855470022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,255,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,255,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,511,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,511,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,1023,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,1023,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,2047,0.023285334308942158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,2047,0.021183999876181286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,4095,0.03356266766786575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,4095,0.03011200080315272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,8191,0.059061333537101746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,8191,0.04834666848182678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,16383,0.10105066498120625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,1,0.04780266682306925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,16383,0.0867146650950114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,1,0.04365866879622141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,3,0.04773866633574168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,3,0.04364266494909922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,7,0.043738668163617454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,7,0.04665066798528036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,15,0.047983999053637184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,15,0.04379733403523763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,31,0.046944002310434975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,31,0.04373333354791006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,63,0.0476800004641215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,63,0.043706665436426796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,127,0.04666133224964142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,127,0.04368533194065094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,255,0.04841599861780802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,255,0.0435146689414978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,511,0.07447466750939687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,511,0.064560001095136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,1023,0.11905066172281902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,1023,0.10223999619483948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,1,0.08502399921417236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,1,0.07630933324495952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,3,0.08550399541854858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,3,0.07671999931335449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,7,0.08493866523106892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,7,0.0758240024248759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,15,0.0764213353395462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,31,0.08499733606974284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,15,0.08553066849708557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,31,0.0759093314409256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,63,0.08691199620564778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,63,0.0767680009206136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,127,0.07481066882610321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,127,0.08523733417193095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,255,0.08699733018875122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,255,0.07846400141716003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,float16,1,0.16039466857910156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,fp8,1,0.14443199833234152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,float16,3,0.1628159979979197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,fp8,3,0.1444480021794637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,float16,7,0.16056533654530844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,fp8,7,0.14435733358065286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,float16,15,0.16217600305875143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,fp8,15,0.14436266819636026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,float16,31,0.16089600324630737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,fp8,31,0.1444906691710154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,float16,63,0.16087466478347778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,fp8,63,0.14453333616256714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,1,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,float16,127,0.15729066729545593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,1,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,3,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,3,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,fp8,127,0.14428800344467163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,7,0.012762666990359625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,7,0.012784000486135483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,15,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,15,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,31,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,31,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,63,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,63,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,127,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,127,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,255,0.01332266628742218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,255,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,511,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,511,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,1023,0.01933866615096728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,1023,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,2047,0.029365333418051403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,2047,0.027189334233601887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,4095,0.04879466692606608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,4095,0.040021332601706185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,8191,0.08144533137480418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,8191,0.06854400038719177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,16383,0.14452800154685974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,16383,0.12243733803431193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,float16,1,0.30827732880910236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,fp8,1,0.27738134066263836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,float16,3,0.31034666299819946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,fp8,3,0.277349332968394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,float16,7,0.3087093234062195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,fp8,7,0.2790293296178182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,float16,15,0.3081226746241252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,fp8,15,0.2775680025418599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,float16,31,0.31014933188756305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,fp8,31,0.2772639989852905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,float16,63,0.3087573250134786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,fp8,63,0.27874666452407837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,fp8,1,0.5406879981358846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,float16,1,0.6033493280410767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,fp8,3,0.5434666474660238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,float16,3,0.6049760182698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,float16,7,0.6026879946390787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,fp8,7,0.5396000146865845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,float16,15,0.6062613328297933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,fp8,15,0.5420693159103394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,1,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,1,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,3,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,3,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,7,0.014757333944241205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,7,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,15,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,15,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,31,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,31,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,63,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,63,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,127,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,127,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,255,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,255,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,float16,31,0.6029599905014038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,511,0.018944000204404194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,fp8,31,0.5396906534830729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,511,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,1023,0.025536000728607178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,1023,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,2047,0.04399466514587402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,2047,0.03552533437808355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,4095,0.07252266506354015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,4095,0.06160533428192139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,1,0.021344001094500225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,8191,0.12635733683904013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,8191,0.1053706705570221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,1,0.019013332823912304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,3,0.02107733239730199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,3,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,7,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,7,0.019007999449968338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,15,0.021189334491888683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,15,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,31,0.021525333325068157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,31,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,63,0.02144533395767212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,63,0.019413333386182785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,127,0.021114667256673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,127,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,255,0.021183999876181286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,255,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,511,0.02515733242034912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,511,0.03849600007136663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,1023,0.0644160012404124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,1023,0.03965333352486292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,2047,0.06670933465162913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,2047,0.11686933040618896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,1,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,4095,0.21630932887395224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,1,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,4095,0.11524266997973125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,3,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,3,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,7,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,7,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,15,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,15,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,31,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,31,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,63,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,63,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,127,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,127,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,255,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,255,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,511,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,511,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,1023,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,1023,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,2047,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,2047,0.013408000270525614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,4095,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,4095,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,8191,0.021322667598724365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,8191,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,16383,0.0422986646493276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,1,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,16383,0.025445332129796345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,1,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,3,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,3,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,7,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,7,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,15,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,15,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,31,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,31,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,63,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,63,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,127,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,127,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,255,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,255,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,511,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,511,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,1023,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,1023,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,2047,0.01749333366751671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,2047,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,4095,0.02515200028816859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,4095,0.02103466788927714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,8191,0.050586665670077004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,8191,0.029557332396507263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,1,0.031248000760873158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,16383,0.08823999762535095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,16383,0.052970667680104576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,1,0.027888000011444092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,3,0.03192000091075897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,3,0.029301332930723827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,7,0.03158933420976003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,7,0.027471999327341717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,15,0.032511999209721885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,15,0.027242665489514668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,31,0.031850665807724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,31,0.029088000456492107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,63,0.03145600110292435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,63,0.02754133443037669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,127,0.03341866781314214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,127,0.02735466758410136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,255,0.03950933367013931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,255,0.02890666574239731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,511,0.06427733103434245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,511,0.04367466767628988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,1023,0.11183999975522359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,1023,0.06426666676998138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,1,0.012805332740147909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,2047,0.2126880089441935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,2047,0.11347732941309611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,1,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,3,0.012266666938861212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,3,0.011546666423479715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,7,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,7,0.012213333199421564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,15,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,15,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,31,0.012810666114091873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,31,0.011711999773979187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,63,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,63,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,127,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,255,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,127,0.011882666498422623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,255,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,511,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,511,0.012789333860079447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,1023,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,1023,0.014826666563749313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,2047,0.02720533311367035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,2047,0.02347733328739802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,4095,0.05550933380921682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,4095,0.03160533308982849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,8191,0.09389866391817729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,8191,0.05787733197212219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,16383,0.17260799805323282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,1,0.0539626677831014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,16383,0.09859200318654378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,1,0.04587199787298838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,3,0.054431999723116554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,3,0.04612799982229868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,7,0.05421866476535797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,7,0.046223998069763184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,15,0.05428266525268555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,15,0.045850664377212524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,31,0.05489066739877065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,31,0.045781334241231285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,63,0.04598933458328247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,63,0.05407999952634176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,127,0.05699199934800466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,127,0.04611733555793762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,255,0.06533333162466685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,255,0.048858667413393654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,511,0.11413333813349406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,511,0.07051200171311696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,1023,0.2079040010770162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,1,0.10106666882832845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,1023,0.11409599582354228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,1,0.08492799599965413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,3,0.10143466790517171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,3,0.08426133791605632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,7,0.10108266274134318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,7,0.08331199983755748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,15,0.1011786659558614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,15,0.08481066425641377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,31,0.10097066561381023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,31,0.08319466809431712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,63,0.10131200154622395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,63,0.08475200335184734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,127,0.10087999701499939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,127,0.08433600266774495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,255,0.11892267068227132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,255,0.08529599507649739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,float16,1,0.189520001411438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,fp8,1,0.15852266550064087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,float16,3,0.18755199511845908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,fp8,3,0.15844266613324484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,float16,7,0.1888213356335958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,fp8,7,0.1588320036729177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,float16,15,0.18927466869354248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,fp8,31,0.1586026648680369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,float16,31,0.18734399477640787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,fp8,15,0.15945600469907126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,float16,63,0.18939733505249023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,fp8,63,0.15843733151753744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,float16,127,0.18837332725524902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,1,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,1,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,3,0.012794667234023413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,3,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,7,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,fp8,127,0.15683733423550925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,7,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,15,0.013274667163689932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,15,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,31,0.013493333011865616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,31,0.012805332740147909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,63,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,63,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,127,0.013503999759753546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,127,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,255,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,255,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,511,0.016229332735141117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,511,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,1023,0.021338666478792827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,1023,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,2047,0.02775466690460841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,2047,0.04801600178082784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,4095,0.07922666768232982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,4095,0.049141332507133484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,8191,0.08225066463152568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,8191,0.14032000303268433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,16383,0.14164266983668009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,16383,0.26529600222905475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,fp8,1,0.3036479949951172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,float16,1,0.3654079834620158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,float16,3,0.3631733258565267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,fp8,3,0.3041440049807231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,float16,7,0.36368000507354736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,fp8,7,0.304367999235789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,float16,15,0.362768014272054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,fp8,15,0.30429333448410034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,float16,31,0.36354132493336994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,fp8,31,0.30458666880925495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,fp8,63,0.30435200532277423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,float16,63,0.3621600071589152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,fp8,1,0.5952746470769247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,float16,1,0.7112639745076498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,float16,3,0.7141813437143961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,fp8,3,0.5957760016123453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,float16,7,0.7116853396097819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,fp8,7,0.5934826532999674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,float16,15,0.7161706288655599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,fp8,15,0.5913546482721964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,1,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,float16,31,0.7137813568115234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,1,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,3,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,3,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,7,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,15,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,7,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,15,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,31,0.01581866666674614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,31,0.015557333827018738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,63,0.016415999581416447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,63,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,127,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,127,0.01743999992807706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,255,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,255,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,511,0.017450666675964992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,511,0.02180800090233485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,1023,0.03959999978542328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,1023,0.023408000667889912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,2047,0.07148266832033794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,fp8,31,0.5951519807179769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,2047,0.04385599990685781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,4095,0.1236853301525116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,4095,0.07113066812356313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,1,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,8191,0.12382400035858154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,1,0.01736533393462499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,3,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,8191,0.23142399390538534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,3,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,7,0.01685333376129468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,7,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,15,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,15,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,31,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,31,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,63,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,63,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,127,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,127,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,255,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,255,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,511,0.02346133440732956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,511,0.021221332252025604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,1023,0.034304000437259674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,1023,0.02934933453798294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,2047,0.05759466687838236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,2047,0.05004266897837321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,4095,0.09513066212336223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,4095,0.0804746647675832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,1,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,1,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,8191,0.169322669506073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,3,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,3,0.010485333700974783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,8191,0.14123200376828512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,7,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,7,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,15,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,15,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,31,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,63,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,31,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,63,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,127,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,127,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,255,0.010079999764760336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,255,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,511,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,511,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,1023,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,1023,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,2047,0.012773333738247553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,2047,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,4095,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,4095,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,8191,0.015935999651749928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,8191,0.016224000602960587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,16383,0.021365332106749218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,16383,0.019424000134070713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,1,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,1,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,32767,0.03400533397992452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,3,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,32767,0.029178666571776073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,3,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,7,0.009759999811649323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,7,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,15,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,15,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,31,0.009455999980370203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,31,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,63,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,63,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,127,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,127,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,255,0.009136000027259191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,255,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,511,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,511,0.011488000551859537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,1023,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,1023,0.012741333494583765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,2047,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,2047,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,4095,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,4095,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,8191,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,8191,0.02125866711139679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,16383,0.036271999279658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,16383,0.02958933264017105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,32767,0.05670933425426483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,1,0.025370667378107708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,32767,0.04978133241335551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,3,0.025424001117547352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,1,0.02334933231274287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,3,0.02367466688156128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,7,0.023610666394233704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,7,0.024959998826185863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,15,0.02532800038655599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,15,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,31,0.02513066679239273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,63,0.025466665625572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,63,0.02327466756105423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,31,0.02319466571013133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,127,0.02518933266401291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,127,0.02327999969323476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,255,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,255,0.023434666295846302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,511,0.031343999008337654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,511,0.03752533346414566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,1023,0.056101332108179726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,1023,0.049882665276527405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,2047,0.09744532903035481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,2047,0.08301333089669545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,4095,0.1710346738497416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,1,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,4095,0.14434666434923807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,3,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,1,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,3,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,7,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,7,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,15,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,15,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,31,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,31,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,63,0.010405333091815313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,63,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,127,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,127,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,255,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,255,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,511,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,511,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,1023,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,1023,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,2047,0.016565332810084026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,2047,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,4095,0.023077333966890972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,4095,0.021114667256673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,8191,0.03793599953254064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,8191,0.029968000948429108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,16383,0.0580320010582606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,16383,0.052005335688591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,1,0.039893334110577904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,32767,0.09984532992045085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,32767,0.08686400453249614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,1,0.03777066618204117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,3,0.04004266609748205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,7,0.03952533255020777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,7,0.03735466549793879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,3,0.037791999677817024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,15,0.03998400022586187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,15,0.035690667728583016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,31,0.039674667020638786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,31,0.03730133424202601
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,63,0.03962666789690653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,63,0.035749333600203194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,127,0.03790933390458425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,127,0.03654933224121729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,255,0.041738669077555336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,255,0.035445332527160645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,511,0.062165334820747375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,511,0.05421866476535797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,1023,0.09708799918492635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,1023,0.08347733815511067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,2047,0.14642133315404257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,1,0.06841599941253662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,2047,0.17299733559290567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,1,0.062218666076660156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,3,0.06842666864395142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,3,0.06267733375231425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,7,0.0693333347638448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,7,0.06438399851322174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,15,0.07057066758473714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,15,0.062277331948280334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,31,0.07049599786599477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,31,0.062319998939832054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,63,0.06983999907970428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,63,0.06427733103434245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,127,0.06956799825032552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,127,0.06237333516279856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,255,0.07236800094445546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,255,0.06412266691525777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,511,0.11173866192499797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,511,0.09517866373062134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,1,0.12998400131861368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,1,0.11597866813341777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,3,0.13012267152468363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,3,0.11569600303967793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,7,0.13010666767756143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,7,0.11621333161989848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,15,0.1300373375415802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,15,0.11608533064524333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,31,0.12990400195121765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,31,0.11587733030319214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,63,0.13024000326792398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,63,0.11570666233698527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,127,0.1281653344631195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,127,0.1157973309357961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,1,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,255,0.12988266348838806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,1,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,255,0.11757866541544597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,3,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,3,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,7,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,7,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,15,0.012671999633312225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,15,0.011424000064531961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,31,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,31,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,63,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,63,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,127,0.012133333832025528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,127,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,255,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,255,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,511,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,511,0.013274667163689932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,1023,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,2047,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,1023,0.01569066693385442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,2047,0.02144533395767212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,4095,0.03855466594298681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,4095,0.02956799914439519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,8191,0.0518506666024526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,8191,0.060736000537872314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,16383,0.1018346647421519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,16383,0.08686400453249614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,32767,0.1887999971707662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,32767,0.15659733613332114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,float16,1,0.2490453322728475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,fp8,1,0.22429333130518594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,float16,3,0.2508693337440491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,fp8,3,0.2244266668955485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,float16,7,0.24885332584381104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,fp8,7,0.22404799858729044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,float16,15,0.25020267566045123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,fp8,15,0.2230400045712789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,fp8,31,0.22395199537277222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,float16,31,0.2490239938100179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,float16,63,0.25061333179473877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,fp8,63,0.22399999698003134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,float16,127,0.24329066276550293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,fp8,127,0.22213866313298544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,float16,1,0.48842132091522217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,fp8,1,0.43530134359995526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,float16,3,0.4859253168106079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,fp8,3,0.43753600120544434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,float16,7,0.48840534687042236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,fp8,7,0.43511466185251874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,float16,15,0.48627734184265137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,fp8,15,0.437173326810201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,float16,31,0.4883466561635335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,fp8,31,0.43532268206278485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,1,0.013306666165590286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,1,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,3,0.013418667018413544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,3,0.013253333667914072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,7,0.013306666165590286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,float16,63,0.48635200659434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,fp8,63,0.4376800060272217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,7,0.01321600005030632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,15,0.014645333091417948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,31,0.013269333789745966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,15,0.014661333213249842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,31,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,63,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,63,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,127,0.013248000293970108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,127,0.013349333157142004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,255,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,255,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,511,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,511,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,1023,0.02146666745344798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,1023,0.021087999145189922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,2047,0.03961066653331121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,2047,0.03148799886306127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,4095,0.06182933350404104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,4095,0.052815998593966164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,8191,0.104010671377182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,8191,0.08814400434494019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,1,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,16383,0.1893493334452311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,1,0.01735466718673706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,16383,0.15688000122706094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,3,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,3,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,7,0.01752000053723653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,7,0.019013332823912304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,15,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,15,0.017407999684413273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,31,0.01884799947341283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,31,0.01735466718673706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,63,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,63,0.017557332913080852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,127,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,127,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,255,0.018837332725524902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,255,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,511,0.03369600077470144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,511,0.022970666488011677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,1023,0.05241066714127859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,1023,0.03395200024048487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,2047,0.09674666325251262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,2047,0.05676266551017761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,4095,0.17462400595347086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,4095,0.09324799974759419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,1,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,1,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,8191,0.33162667353947956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,8191,0.1687893271446228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,3,0.010581333190202713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,3,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,7,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,7,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,15,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,15,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,31,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,31,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,63,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,63,0.010421333213647207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,127,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,127,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,255,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,255,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,511,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,511,0.011546666423479715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,1023,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,1023,0.012655999511480331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,2047,0.01423466702302297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,2047,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,4095,0.016282666474580765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,4095,0.01479999969402949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,8191,0.019658666104078293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,8191,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,16383,0.036042665441830955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,16383,0.02128000060717265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,1,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,32767,0.05648533503214518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,1,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,32767,0.03522666543722153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,3,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,3,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,7,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,7,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,15,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,15,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,31,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,31,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,63,0.010506667196750641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,63,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,127,0.011584000041087469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,127,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,255,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,511,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,255,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,511,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,1023,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,1023,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,2047,0.014826666563749313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,2047,0.015562667200962702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,4095,0.019653332730134327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,4095,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,8191,0.03690666705369949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,8191,0.021589333812395733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,16383,0.05859733124574026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,16383,0.035802667339642845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,1,0.02775999903678894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,32767,0.10129599769910176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,1,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,32767,0.05531733234723409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,3,0.02518400053183238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,3,0.027589333554108936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,7,0.027456000447273254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,7,0.02535466601451238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,15,0.027610667049884796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,15,0.02363733450571696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,31,0.027637332677841187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,31,0.025120000044504803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,63,0.027744000156720478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,63,0.025301332275072735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,127,0.027263998985290527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,127,0.02518400053183238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,255,0.03438399980465571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,255,0.023317334552605946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,511,0.05402666827042898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,511,0.03640000025431315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,1023,0.09093333284060161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,1023,0.054234668612480164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,2047,0.17306667566299438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,2047,0.09519466757774353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,1,0.011370666325092316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,1,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,4095,0.3298773368199666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,4095,0.16870399316151938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,3,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,3,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,7,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,7,0.011498666057984034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,15,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,15,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,31,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,31,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,63,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,63,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,127,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,127,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,255,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,255,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,511,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,511,0.012682666381200155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,1023,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,1023,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,2047,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,2047,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,4095,0.03818666686614355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,4095,0.02250133454799652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,8191,0.05862933397293091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,8191,0.03779733429352442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,16383,0.10272000233332317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,16383,0.05678399900595347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,1,0.044906665881474815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,32767,0.1925119956334432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,32767,0.09593600034713745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,1,0.03774933268626531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,3,0.04554133117198944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,3,0.03941333293914795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,7,0.04572799801826477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,7,0.0397173340121905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,15,0.04568533102671305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,15,0.039493332306543984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,31,0.04563199977080027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,31,0.03980266551176707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,63,0.04520533482233683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,63,0.03945599993069967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,127,0.048101335763931274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,127,0.03793066740036011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,255,0.05515733361244202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,255,0.039936001102129616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,511,0.09340266386667888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,511,0.06029333174228668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,1023,0.16830933094024658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,1023,0.09502933422724406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,2047,0.3267040054003398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,1,0.06859733164310455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,1,0.08181866506735484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,2047,0.169487992922465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,3,0.06833600004514058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,3,0.08293866614500682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,7,0.08274133503437042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,7,0.06874666611353557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,15,0.06878933310508728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,15,0.08273600041866302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,31,0.08276266853014629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,31,0.06894933183987935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,63,0.08273600041866302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,63,0.06841599941253662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,127,0.08297066887219746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,255,0.07043200234572093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,255,0.09666666388511658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,127,0.06919466455777486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,511,0.17162134250005087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,511,0.10706133643786113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,1,0.12990933656692505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,1,0.1545866628487905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,3,0.15290666619936624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,3,0.12897066275278726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,7,0.12990933656692505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,7,0.15385066469510397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,15,0.15253866712252298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,15,0.12981866796811423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,31,0.15439466635386148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,31,0.12853333353996277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,63,0.15402666727701822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,63,0.12999999523162842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,127,0.15275733669598898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,127,0.12776000301043192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,255,0.18056533734003702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,255,0.12920533617337546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,1,0.012831999609867731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,1,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,3,0.012629333883523941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,3,0.012784000486135483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,7,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,15,0.012741333494583765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,7,0.012661332885424295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,15,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,31,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,31,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,63,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,63,0.01228800043463707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,127,0.012730666746695837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,127,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,255,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,255,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,511,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,511,0.01328533391157786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,1023,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,1023,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,2047,0.038890667259693146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,2047,0.02333866556485494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,4095,0.060090666015942894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,4095,0.03961600114901861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,8191,0.10316266616185506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,8191,0.05899733304977417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,16383,0.18769599994023642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,16383,0.09980799754460652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,32767,0.36633066336313885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,32767,0.18313600619633993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,float16,1,0.29409066836039227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,fp8,1,0.24702399969100952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,float16,3,0.2932213346163432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,fp8,3,0.24665600061416626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,float16,7,0.2938773234685262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,fp8,7,0.24542933702468872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,fp8,15,0.24816532929738364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,float16,15,0.29412267605463666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,float16,31,0.2940053343772888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,fp8,31,0.24591465791066489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,float16,63,0.29579200347264606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,fp8,63,0.24665067593256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,float16,127,0.2939573327700297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,fp8,127,0.24198933442433676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,float16,1,0.5754719972610474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,fp8,1,0.4782133499781291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,fp8,3,0.4777439832687378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,float16,3,0.5735199848810831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,float16,7,0.5757013161977133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,fp8,7,0.47727465629577637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,float16,15,0.5721866687138876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,fp8,15,0.47866666316986084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,float16,31,0.5763413508733114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,1,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,fp8,31,0.47625601291656494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,1,0.014805333067973455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,fp8,63,0.47838401794433594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,3,0.014752000570297241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,float16,63,0.572160005569458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,3,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,7,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,7,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,15,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,15,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,31,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,63,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,31,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,63,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,127,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,127,0.01393066719174385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,255,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,255,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,511,0.018858666221300762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,511,0.01681600014368693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,1023,0.035461333890755974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,1023,0.021477334201335907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,2047,0.03952533255020777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,2047,0.06062399844328562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,4095,0.10382399956385295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,4095,0.060229331254959106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,8191,0.10241599877675374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,8191,0.18703466653823853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,1,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,1,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,16383,0.1853813330332438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,3,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,3,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,16383,0.36052266756693524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,7,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,7,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,15,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,15,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,31,0.01629866659641266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,31,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,63,0.016890666137139004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,63,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,127,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,127,0.015498666713635126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,255,0.016549333930015564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,255,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,511,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,511,0.018911999960740406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,1023,0.02937600016593933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,1023,0.025466665625572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,2047,0.050000001986821495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,2047,0.043381333351135254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,4095,0.08197866876920064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,4095,0.0706826647122701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,1,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,8191,0.1483733355998993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,1,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,3,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,8191,0.1220853328704834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,3,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,7,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,7,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,15,0.012495999534924826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,15,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,31,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,31,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,63,0.008805333326260248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,63,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,127,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,127,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,255,0.01055466632048289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,255,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,511,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,511,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,1023,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,1023,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,2047,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,2047,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,4095,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,4095,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,8191,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,8191,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,16383,0.021338666478792827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,16383,0.018901333212852478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,1,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,32767,0.029029332101345062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,1,0.010559999694426855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,3,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,32767,0.025648000339667004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,3,0.010565333068370819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,7,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,7,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,15,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,15,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,31,0.010533332824707031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,31,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,63,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,63,0.010533332824707031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,127,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,127,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,255,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,255,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,511,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,511,0.012602667013804117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,1023,0.014138666292031607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,1023,0.013359999905029932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,2047,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,2047,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,4095,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,4095,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,8191,0.020997333029905956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,8191,0.01899733394384384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,16383,0.029653333127498627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,16383,0.025722667574882507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,1,0.021381333470344543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,32767,0.044549331068992615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,1,0.02162666618824005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,3,0.021183999876181286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,32767,0.03873066604137421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,3,0.020960000654061634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,7,0.021189334491888683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,7,0.02092266579469045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,15,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,15,0.02109866589307785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,31,0.021327999730904896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,31,0.020794666061798733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,63,0.021173333128293354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,127,0.021520001192887623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,127,0.019893333315849304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,63,0.021781332790851593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,255,0.021061333517233532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,255,0.019493332753578823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,511,0.031136001149813335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,1023,0.04615999758243561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,511,0.027237333357334137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,1023,0.03968533376852671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,2047,0.07890666524569194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,2047,0.06876266499360402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,1,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,4095,0.1381226678689321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,4095,0.1157919963200887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,1,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,3,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,3,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,7,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,7,0.01138666644692421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,15,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,15,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,31,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,31,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,63,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,63,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,127,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,127,0.011407999942700068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,255,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,511,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,255,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,511,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,1023,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,1023,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,2047,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,2047,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,4095,0.023018665611743927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,4095,0.021029333273569744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,8191,0.03391999999682108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,8191,0.02945599953333537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,16383,0.058101331194241844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,16383,0.04957866668701172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,32767,0.0990773340066274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,1,0.033370666205883026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,32767,0.08693866928418477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,1,0.0313226655125618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,3,0.03147733211517334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,3,0.029365333418051403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,7,0.031680000325044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,7,0.029493334392706554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,15,0.03169599920511246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,15,0.029546665648619335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,31,0.031328000128269196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,31,0.031311998764673867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,63,0.03181333343187968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,63,0.02945599953333537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,127,0.03136533250411352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,127,0.029482667644818623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,255,0.03388266762097677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,255,0.029616000751654308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,511,0.05031466484069824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,511,0.044079999128977455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,1023,0.07853333155314128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,1023,0.06850666801134746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,2047,0.13758400082588196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,1,0.05453866720199585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,2047,0.11749866604804993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,1,0.049813335140546165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,3,0.054133335749308266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,3,0.04975466430187225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,7,0.05401599903901418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,7,0.04969066878159841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,15,0.05406933526198069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,15,0.05016533533732096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,31,0.05429866909980774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,63,0.05415999889373779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,63,0.05026666820049286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,31,0.05081599950790405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,127,0.05417066812515259
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,127,0.05022933085759481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,255,0.05782933533191681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,255,0.053727999329566956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,511,0.0865760048230489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,511,0.07512533167997996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,1,0.10043733318646748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,3,0.10062932968139648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,1,0.08948799967765808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,3,0.09085866808891296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,7,0.10074133674303691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,7,0.08875200152397156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,15,0.10131733616193135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,15,0.09095999598503113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,31,0.10038399696350098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,31,0.08876267075538635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,63,0.09112000465393066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,63,0.0997226635615031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,127,0.09940266609191895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,127,0.08990400036176045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,1,0.011653333902359009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,1,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,255,0.10197333494822185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,255,0.09115733702977498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,3,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,3,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,7,0.011786667009194693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,7,0.011424000064531961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,15,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,15,0.012826666235923767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,31,0.01190399999419848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,31,0.011663999408483505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,63,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,63,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,127,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,127,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,255,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,255,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,511,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,511,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,1023,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,1023,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,2047,0.023018665611743927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,2047,0.021253332495689392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,4095,0.03548266738653183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,4095,0.03133333226044973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,8191,0.06422933439413707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,8191,0.05019199848175049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,16383,0.10572800040245056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,16383,0.08715732892354329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,32767,0.18607999881108603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,32767,0.15829867124557495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,float16,1,0.1908586621284485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,fp8,1,0.17101333538691202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,float16,3,0.19165867567062378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,fp8,3,0.1715786655743917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,float16,7,0.1893120010693868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,fp8,7,0.17285333077112833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,float16,15,0.19158399105072021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,fp8,15,0.1711840033531189
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,float16,31,0.1917440096537272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,fp8,31,0.17310933272043863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,float16,63,0.18924800554911295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,fp8,63,0.17275200287501016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,float16,127,0.18787733713785806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,fp8,127,0.1701386570930481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,float16,1,0.3694773515065511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,fp8,1,0.331221342086792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,float16,3,0.3697439829508464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,fp8,3,0.33009066184361774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,float16,7,0.36954132715861004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,fp8,7,0.33082133531570435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,float16,15,0.36846399307250977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,fp8,15,0.3304533362388611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,float16,31,0.3696800072987874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,1,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,1,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,fp8,31,0.3306506673494975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,float16,63,0.36790935198465985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,3,0.01321600005030632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,3,0.013237333546082178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,fp8,63,0.3306453426678975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,7,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,7,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,15,0.013082666943470636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,15,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,31,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,31,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,63,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,63,0.012847999731699625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,127,0.013466666142145792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,127,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,255,0.012714666624863943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,255,0.012789333860079447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,511,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,511,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,1023,0.01897066707412402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,1023,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,2047,0.031770666440327965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,2047,0.02717866748571396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,4095,0.050330668687820435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,4095,0.04256533086299896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,8191,0.08623466889063518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,8191,0.07062933345635732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,1,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,1,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,16383,0.1469066639741262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,16383,0.12177600463231404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,3,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,3,0.016229332735141117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,7,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,7,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,15,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,15,0.01637866720557213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,31,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,31,0.014826666563749313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,63,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,63,0.016602666427691776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,127,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,127,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,255,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,255,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,511,0.025557334224383037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,511,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,1023,0.028005334238211315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,1023,0.04451733330885569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,2047,0.0820906658967336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,2047,0.04816000163555145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,4095,0.08195200065771739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,4095,0.14692266782124838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,1,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,8191,0.14538666605949402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,1,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,8191,0.27826132376988727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,3,0.010442666709423065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,3,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,7,0.010485333700974783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,7,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,15,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,15,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,31,0.010618666807810465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,31,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,63,0.010378666842977205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,63,0.011333333949247995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,127,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,127,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,255,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,255,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,511,0.01137599969903628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,511,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,1023,0.011333333949247995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,1023,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,2047,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,2047,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,4095,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,4095,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,8191,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,8191,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,16383,0.025536000728607178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,16383,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,1,0.011365332951148352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,32767,0.04737600187460581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,1,0.011440000186363855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,32767,0.02735999971628189
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,3,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,3,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,7,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,7,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,15,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,15,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,31,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,31,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,63,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,63,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,127,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,127,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,255,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,255,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,511,0.011893333246310553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,511,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,1023,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,1023,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,2047,0.01687466725707054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,2047,0.015429332852363586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,4095,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,4095,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,8191,0.025424001117547352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,8191,0.020848001043001812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,16383,0.04786133269468943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,16383,0.029546665648619335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,1,0.023520000278949738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,32767,0.07733333110809326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,32767,0.04526400069395701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,1,0.021066665649414062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,3,0.023423999547958374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,3,0.021040000021457672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,7,0.0235359991590182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,7,0.021338666478792827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,15,0.02310933421055476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,15,0.021087999145189922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,31,0.023311999936898548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,31,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,63,0.02346666653951009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,63,0.021338666478792827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,127,0.023061332603295643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,127,0.021168000996112823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,255,0.025941332181294758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,255,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,511,0.04478933413823446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,511,0.03158933420976003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,1023,0.07338666419188182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,1023,0.045882667104403176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,2047,0.13822399576505026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,2047,0.0772213339805603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,1,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,4095,0.2623093326886495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,4095,0.13635200262069702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,1,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,3,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,3,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,7,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,7,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,15,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,31,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,15,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,31,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,63,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,63,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,127,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,127,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,255,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,511,0.01191466674208641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,255,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,511,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,1023,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,1023,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,2047,0.018992000569899876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,2047,0.017445333302021027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,4095,0.029530666768550873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,4095,0.023120000958442688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,8191,0.056074668963750206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,8191,0.034917332231998444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,16383,0.09273599584897359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,16383,0.05596800148487091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,1,0.03751466671625773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,32767,0.17043733596801758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,32767,0.09342400232950847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,1,0.032431999842325844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,3,0.037647999823093414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,3,0.0316746657093366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,7,0.03555733213822047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,7,0.032629333436489105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,15,0.035749333600203194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,15,0.03141866624355316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,31,0.03760000069936117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,31,0.0314026673634847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,63,0.03583999971548716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,63,0.03156800071398417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,127,0.03931200007597605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,127,0.031701333820819855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,255,0.04445866743723551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,255,0.033258666594823204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,511,0.07369600236415863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,511,0.04795733094215393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,1023,0.13091733058293661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,1023,0.07464000085989635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,2047,0.2520853281021118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,1,0.06443733473618825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,2047,0.13269866506258646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,1,0.053818667928377785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,3,0.06460266808668773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,3,0.05299200117588043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,7,0.0645066648721695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,7,0.05410666763782501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,15,0.06437333424886067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,15,0.05413866539796194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,31,0.06437333424886067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,31,0.052853330969810486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,63,0.06445866823196411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,63,0.05411200225353241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,127,0.06444799900054932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,127,0.05532266696294149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,255,0.07594133416811626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,255,0.05615466833114624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,511,0.1332266628742218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,1,0.1179253359635671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,1,0.09921066959698994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,511,0.08352532982826233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,3,0.11970133582750957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,3,0.10098133484522502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,7,0.11756267150243123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,7,0.1014400025208791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,15,0.11962667107582092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,15,0.10058666268984477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,31,0.11782933274904887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,31,0.1011840005715688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,63,0.11988266309102376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,63,0.10040000081062317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,127,0.11889599760373433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,1,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,127,0.10054399569829305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,1,0.01166933278242747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,255,0.14017599821090698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,255,0.10496000448862712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,3,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,3,0.01339200014869372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,7,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,7,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,15,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,15,0.012815999488035837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,31,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,31,0.012752000242471695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,63,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,63,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,127,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,127,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,255,0.012741333494583765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,255,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,511,0.014736000448465347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,511,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,1023,0.018976000448067982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,1023,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,2047,0.029674666623274486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,2047,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,4095,0.0360000009338061
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,4095,0.06152533491452535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,8191,0.09852799773216248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,8191,0.06367466847101848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,16383,0.10216533144315083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,16383,0.17543999354044595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,32767,0.1814346710840861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,32767,0.3351840178171794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,float16,1,0.22302399079004923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,float16,3,0.2221440076828003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,fp8,3,0.18766399224599203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,float16,7,0.22250133752822876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,fp8,7,0.18752533197402954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,float16,15,0.2228213349978129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,fp8,15,0.18772266308466592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,float16,31,0.22230400641759238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,fp8,31,0.1876586675643921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,float16,63,0.22259199619293213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,fp8,63,0.1881706714630127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,float16,127,0.22235200802485147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,fp8,127,0.18705600500106812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,fp8,1,0.190010666847229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,float16,1,0.43169065316518146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,float16,3,0.43298665682474774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,fp8,1,0.3635893265406291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,fp8,3,0.3641973336537679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,float16,7,0.4320640166600545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,fp8,7,0.36351466178894043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,float16,15,0.43350934982299805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,fp8,31,0.36186667283376056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,fp8,15,0.3641759951909383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,float16,31,0.43297600746154785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,1,0.013530666629473368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,1,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,3,0.013269333789745966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,3,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,7,0.013834666460752487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,7,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,float16,63,0.4333333174387614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,15,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,15,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,31,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,31,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,63,0.012847999731699625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,63,0.013167999684810638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,127,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,127,0.013525333255529404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,255,0.013669333110253016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,255,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,511,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,511,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,1023,0.024933333198229473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,1023,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,2047,0.04929066697756449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,2047,0.030597334106763203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,fp8,63,0.36191999912261963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,4095,0.08385066191355388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,4095,0.0499839981396993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,8191,0.1471466620763143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,8191,0.08276266853014629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,1,0.014064000298579534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,1,0.014096000542243322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,16383,0.27533332506815594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,16383,0.14430399735768637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,3,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,3,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,7,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,15,0.01358933374285698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,7,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,15,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,31,0.014896000425020853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,31,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,63,0.013674666484196981
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,63,0.013418667018413544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,127,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,127,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,255,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,255,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,511,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,511,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,1023,0.022709332406520844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,1023,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,2047,0.03987200061480204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,2047,0.03156266609827677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,4095,0.06205333272616068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,4095,0.0532533327738444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,8191,0.10382933417956035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,8191,0.08890133102734883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,1,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,1,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,3,0.009125333279371262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,3,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,16383,0.1587999959786733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,16383,0.18997865915298462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,7,0.009365333244204521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,7,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,15,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,15,0.011333333949247995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,31,0.009541333342591921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,31,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,63,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,63,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,127,0.010149333626031876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,127,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,255,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,255,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,511,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,511,0.011354666203260422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,1023,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,1023,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,2047,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,2047,0.01340266689658165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,4095,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,4095,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,8191,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,8191,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,16383,0.018842666099468868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,16383,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,32767,0.023178666830062866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,32767,0.021541332205136616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,1,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,1,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,65535,0.03589333345492681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,3,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,3,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,7,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,65535,0.02976000060637792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,15,0.010480000327030817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,7,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,15,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,31,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,31,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,63,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,63,0.010437333335479101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,127,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,127,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,255,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,255,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,511,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,511,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,1023,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,1023,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,2047,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,2047,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,4095,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,4095,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,8191,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,8191,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,16383,0.02311466634273529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,16383,0.021194666624069214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,32767,0.03554133325815201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,32767,0.029616000751654308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,1,0.01749333366751671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,65535,0.056986664732297264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,1,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,3,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,3,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,65535,0.05003199974695841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,7,0.017477333545684814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,7,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,15,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,15,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,31,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,31,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,63,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,63,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,127,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,127,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,255,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,255,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,511,0.02205866575241089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,511,0.02124800036350886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,1023,0.03561066587766012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,1023,0.028021333118279774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,2047,0.058543999989827476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,2047,0.05006400247414907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,4095,0.09636800487836202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,4095,0.08112533390522003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,1,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,1,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,8191,0.1702186663945516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,3,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,3,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,7,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,7,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,8191,0.14273066322008768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,15,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,15,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,31,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,63,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,31,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,63,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,127,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,127,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,255,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,255,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,511,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,511,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,1023,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,1023,0.012624000509579977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,2047,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,2047,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,4095,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,4095,0.018437333405017853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,8191,0.023370665808518726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,8191,0.02275199939807256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,16383,0.03841066608826319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,16383,0.03084266682465871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,32767,0.05797866483529409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,32767,0.051776001850763954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,1,0.025333332518736523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,65535,0.10016533732414246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,1,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,65535,0.08700266480445862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,3,0.025418666501839954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,3,0.023552000522613525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,7,0.025392000873883564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,7,0.023573334018389385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,15,0.025311999022960663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,15,0.023258666197458904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,31,0.025450666745503742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,31,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,63,0.025306666890780132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,63,0.023365333676338196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,127,0.025205334027608235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,127,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,255,0.025279998779296875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,255,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,511,0.03155199935038885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,511,0.03766933331886927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,1023,0.05600533386071523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,1023,0.04906666775544485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,2047,0.09946133693059285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,2047,0.08296533425649007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,1,0.03948266555865606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,4095,0.1728960076967875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,4095,0.14457066853841147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,1,0.036101333796978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,3,0.03951466580231985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,3,0.03711999952793121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,7,0.039647998909155525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,7,0.037503999968369804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,15,0.03977066775163015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,15,0.036559998989105225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,31,0.03979199876387914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,31,0.03729599962631861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,63,0.03952533255020777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,63,0.03774400055408478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,127,0.03792533278465271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,127,0.035989334185918175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,255,0.041893333196640015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,255,0.035562666753927864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,511,0.06203199923038483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,511,0.05440000196297964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,1023,0.09889066219329834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,1023,0.08285333216190338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,1,0.06402133405208588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,1,0.07067733506361644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,3,0.0684853345155716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,3,0.06213866670926412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,7,0.0699839989344279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,15,0.06913599868615468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,7,0.06233599781990051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,15,0.06251733501752217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,31,0.0703893353541692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,31,0.06247466802597046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,63,0.07022400200366974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,63,0.062208001812299095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,127,0.06862399975458781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,127,0.062090665102005005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,255,0.07169066866238911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,255,0.06421866516272227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,1,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,511,0.11038933197657268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,1,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,3,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,3,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,7,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,7,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,511,0.09547199805577596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,15,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,15,0.011488000551859537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,31,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,31,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,63,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,63,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,127,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,127,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,255,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,255,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,511,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,511,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,1023,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,1023,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,2047,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,2047,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,4095,0.02250133454799652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,4095,0.021274665991465252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,8191,0.037845333417256675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,8191,0.029440000653266907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,16383,0.05190399785836538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,16383,0.058362667759259544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,32767,0.10014399886131287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,32767,0.08513066172599792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,65535,0.1564479966958364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,65535,0.18457067012786865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,1,0.12985066572825113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,1,0.11568533380826314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,3,0.11567466457684834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,3,0.13036266962687174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,7,0.13012799620628357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,7,0.11564266681671143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,15,0.13016000390052795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,15,0.1157919963200887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,31,0.1160533328851064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,31,0.13009066383043924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,63,0.13024000326792398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,63,0.11589866876602173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,127,0.1276693344116211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,127,0.11775466799736023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,255,0.13034666577974954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,255,0.11606933673222859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,float16,1,0.25090134143829346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,fp8,1,0.22481600443522134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,float16,3,0.24896534283955893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,fp8,3,0.2243893345197042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,fp8,7,0.2239946722984314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,float16,15,0.24886933962504068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,float16,7,0.25089067220687866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,fp8,15,0.22445333003997803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,float16,31,0.24864532550175986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,fp8,31,0.2241706649462382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,float16,63,0.2508586645126343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,1,0.011333333949247995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,fp8,63,0.22490666309992471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,1,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,3,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,3,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,7,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,7,0.012789333860079447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,15,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,15,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,31,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,float16,127,0.24368532498677573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,31,0.012794667234023413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,fp8,127,0.22186134258906046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,63,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,63,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,127,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,127,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,255,0.011834666132926941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,255,0.012736000120639801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,511,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,511,0.013082666943470636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,1023,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,1023,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,2047,0.021189334491888683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,2047,0.023423999547958374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,4095,0.03957866628964742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,4095,0.029530666768550873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,8191,0.05229333539803823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,8191,0.06020799775918325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,16383,0.10193600257237752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,16383,0.08705600102742513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,1,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,1,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,3,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,3,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,7,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,32767,0.15664533774058023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,32767,0.18796799580256143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,7,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,15,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,15,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,31,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,31,0.01357866699496905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,63,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,63,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,127,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,127,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,255,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,255,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,511,0.0195573332409064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,511,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,1023,0.03482133398453394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,1023,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,2047,0.03980266551176707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,2047,0.061424002051353455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,4095,0.10339732964833577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,4095,0.061247999469439186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,8191,0.10284266869227092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,8191,0.1870666742324829
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,1,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,1,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,3,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,3,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,7,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,16383,0.18679465850194296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,16383,0.3580160140991211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,7,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,15,0.010458666831254959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,15,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,31,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,31,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,63,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,63,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,127,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,127,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,255,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,255,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,511,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,511,0.011445333560307821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,1023,0.011429333438475927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,1023,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,2047,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,2047,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,4095,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,4095,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,8191,0.017445333302021027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,8191,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,16383,0.02123733361562093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,16383,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,32767,0.03675200045108795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,32767,0.02346133440732956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,1,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,1,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,65535,0.05621333420276642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,3,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,3,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,7,0.010618666807810465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,65535,0.036117332677046456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,7,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,15,0.010581333190202713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,15,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,31,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,31,0.012330666184425354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,63,0.0102613332370917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,63,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,127,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,127,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,255,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,255,0.010570666442314783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,511,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,511,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,1023,0.012335999558369318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,1023,0.012634667257467905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,2047,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,2047,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,4095,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,4095,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,8191,0.019391999890406925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,8191,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,16383,0.03687999894221624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,16383,0.023061332603295643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,32767,0.05794133245944977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,32767,0.03575466573238373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,1,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,1,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,65535,0.10022399822870891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,3,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,3,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,65535,0.056143999099731445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,7,0.019386666516462963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,7,0.017477333545684814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,15,0.018986667195955913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,15,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,31,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,31,0.016890666137139004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,63,0.01899733394384384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,63,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,127,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,127,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,255,0.019050666441520054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,255,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,511,0.03404266635576884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,511,0.023354666928450268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,1023,0.053029333551724754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,1023,0.03533333291610082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,2047,0.09708799918492635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,2047,0.05816000203291575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,4095,0.17393600940704346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,4095,0.09523733456929524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,1,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,1,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,3,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,3,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,7,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,8191,0.32939199606577557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,8191,0.1704746683438619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,7,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,15,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,15,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,31,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,31,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,63,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,63,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,127,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,127,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,255,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,255,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,511,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,511,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,1023,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,1023,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,2047,0.017583999782800674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,2047,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,4095,0.021295999487241108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,4095,0.018917333334684372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,8191,0.039808000127474465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,8191,0.023717333873112995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,16383,0.06002666552861532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,16383,0.03811199963092804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,32767,0.10178666313489278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,32767,0.058330665032068886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,1,0.02749866743882497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,1,0.023567999402681988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,65535,0.18759999672571817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,3,0.02756800005833308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,65535,0.09706667065620422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,3,0.02515733242034912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,7,0.027610667049884796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,7,0.025477332373460133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,15,0.02754666656255722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,15,0.025120000044504803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,31,0.02515733242034912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,31,0.027424000203609467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,63,0.027471999327341717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,63,0.025226667523384094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,127,0.027429332335789997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,127,0.02513066679239273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,255,0.03425599883000056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,255,0.023413332800070446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,511,0.05418666700522105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,511,0.03763733307520548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,1023,0.09129599730173747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,1023,0.0553706685702006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,2047,0.1747679909070333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,2047,0.09604799747467041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,1,0.044293334086736046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,4095,0.3290613293647766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,4095,0.1713226636250814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,1,0.038533332447210945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,3,0.0458133320013682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,3,0.039605334401130676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,7,0.04433066646258036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,7,0.03789333254098892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,15,0.045647998650868736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,15,0.038176000118255615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,31,0.04423999786376953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,31,0.03975466638803482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,63,0.045754666129748024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,63,0.03756266583998998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,127,0.04795200129350027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,127,0.03896533449490865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,255,0.054272000988324486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,255,0.04271999994913737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,511,0.09382399916648865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,511,0.06003733476003011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,1023,0.16722132762273154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,1,0.08262399832407634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,1023,0.09546666344006856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,1,0.06871999800205231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,3,0.08183999856313069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,3,0.06831466654936473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,7,0.08295999964078267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,7,0.06850133339564006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,15,0.08190933366616567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,15,0.06824000179767609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,31,0.08291199803352356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,31,0.06861866513888042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,63,0.08105066418647766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,63,0.06846933563550313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,127,0.08321066697438557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,127,0.06886933247248332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,255,0.09691733121871948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,255,0.07266133526961009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,1,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,1,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,3,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,511,0.1737066706021627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,511,0.10619733730951945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,3,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,7,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,7,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,15,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,31,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,15,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,31,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,63,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,63,0.011338666081428528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,127,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,127,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,255,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,255,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,511,0.012266666938861212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,511,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,1023,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,1023,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,2047,0.019386666516462963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,2047,0.017407999684413273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,4095,0.03847466657559077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,4095,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,8191,0.059445331494013466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,8191,0.0379573330283165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,16383,0.10108266274134318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,16383,0.057029331723848976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,32767,0.18651199340820312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,32767,0.09707732995351155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,65535,0.36637866497039795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,1,0.1535360018412272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,65535,0.17748266458511353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,1,0.12943999965985617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,3,0.15286933382352194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,3,0.12917332847913107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,7,0.15254933635393778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,7,0.13005333145459494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,15,0.15265599886576334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,15,0.12972799936930338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,31,0.15264000495274863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,31,0.12980266412099203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,63,0.1525973379611969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,63,0.12983466188112894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,127,0.15242666999499002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,127,0.12772267063458762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,255,0.17946666479110718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,255,0.13180800278981528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,float16,1,0.29231999317804974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,float16,3,0.29370667537053424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,fp8,3,0.246943990389506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,fp8,1,0.24781866868336996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,fp8,7,0.24676799774169922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,float16,15,0.29392000039418537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,fp8,15,0.24879467487335205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,float16,31,0.2937386631965637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,fp8,31,0.24710933367411295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,float16,63,0.29577600955963135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,fp8,63,0.24861333767573038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,float16,127,0.29393599430720013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,1,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,1,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,fp8,127,0.24514667193094888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,3,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,3,0.012752000242471695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,7,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,7,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,15,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,15,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,31,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,31,0.01211200033624967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,63,0.013082666943470636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,63,0.011407999942700068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,127,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,127,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,255,0.012469333906968435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,255,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,511,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,511,0.013274667163689932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,1023,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,1023,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,2047,0.037903999288876854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,2047,0.023247999449570973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,4095,0.060229331254959106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,4095,0.040405333042144775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,8191,0.10196800033251445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,8191,0.05840000013510386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,16383,0.1885546644528707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,16383,0.09981333216031392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,1,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,float16,7,0.2932426730791728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,1,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,3,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,32767,0.3577920198440552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,3,0.013514666507641474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,7,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,32767,0.1829493244489034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,15,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,7,0.01321600005030632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,15,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,31,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,31,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,63,0.013365333278973898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,63,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,127,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,127,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,255,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,255,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,511,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,511,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,1023,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,1023,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,2047,0.032170665760835014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,2047,0.027301333844661713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,4095,0.05037866532802582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,4095,0.042223999897638954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,8191,0.08292800188064575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,8191,0.07037333150704701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,1,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,1,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,3,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,3,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,16383,0.14781333009401956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,7,0.01055466632048289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,16383,0.12193066875139873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,7,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,15,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,15,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,31,0.00984533317387104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,31,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,63,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,63,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,127,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,127,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,255,0.009306666751702627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,255,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,511,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,511,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,1023,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,1023,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,2047,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,2047,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,4095,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,4095,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,8191,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,8191,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,16383,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,16383,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,32767,0.02143999934196472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,32767,0.019152000546455383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,1,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,1,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,65535,0.02938666691382726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,3,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,3,0.010250666489203772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,7,0.010399999717871347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,65535,0.025594666600227356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,7,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,15,0.0102186668664217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,15,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,31,0.009082666908701261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,31,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,63,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,63,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,127,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,127,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,255,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,255,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,511,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,511,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,1023,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,1023,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,2047,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,2047,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,4095,0.015461333096027374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,4095,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,8191,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,8191,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,16383,0.021087999145189922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,16383,0.019509332875410717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,32767,0.02951466788848241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,32767,0.025349333882331848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,1,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,65535,0.04586666822433472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,1,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,3,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,3,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,65535,0.03969600051641464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,7,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,7,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,15,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,15,0.015882667154073715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,31,0.016250666230916977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,31,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,63,0.01632000009218852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,63,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,127,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,127,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,255,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,255,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,511,0.019695999721686046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,511,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,1023,0.02935466667016347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,1023,0.02517866591612498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,2047,0.049882665276527405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,2047,0.04372799893220266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,4095,0.08293333152929942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,4095,0.07054933408896129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,1,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,1,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,3,0.010421333213647207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,3,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,7,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,8191,0.14730133612950644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,8191,0.12300266822179158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,7,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,15,0.010543999572594961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,31,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,15,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,31,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,63,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,63,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,127,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,127,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,255,0.010581333190202713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,255,0.010575999816258749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,511,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,511,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,1023,0.013770667215188345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,1023,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,2047,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,2047,0.016832000265518825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,4095,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,4095,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,8191,0.021589333812395733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,8191,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,16383,0.03156800071398417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,16383,0.02719466636578242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,32767,0.04729066789150238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,32767,0.04031999905904134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,1,0.021541332205136616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,65535,0.0772213339805603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,1,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,3,0.01939733326435089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,3,0.02146133283774058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,65535,0.06447466711203258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,7,0.021205333371957142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,7,0.021674667795499165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,15,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,15,0.02103466788927714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,31,0.021344001094500225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,31,0.01958400011062622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,63,0.021205333371957142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,63,0.020703999946514767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,127,0.021242665747801464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,127,0.02124800036350886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,255,0.021407999098300934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,255,0.0205226664741834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,511,0.031386665999889374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,511,0.027306665976842243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,1023,0.04615999758243561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,1023,0.040991999208927155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,2047,0.0791733314593633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,2047,0.06854400038719177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,1,0.03155199935038885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,4095,0.13903466860453287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,4095,0.1176479955514272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,1,0.029504001140594482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,3,0.03349333256483078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,3,0.02961066613594691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,7,0.03166933357715607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,7,0.03019733230272929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,15,0.031583999594052635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,15,0.029631999631722767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,31,0.031471999982992806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,31,0.0312266672650973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,63,0.0317493329445521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,63,0.0295413335164388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,127,0.031871999303499855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,127,0.029648000995318096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,255,0.03358400116364161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,511,0.050661335388819374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,255,0.030640001098314922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,511,0.043765331308046974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,1023,0.07638933261235555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,1023,0.0680320014556249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,1,0.049685334165891014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,1,0.05428266525268555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,3,0.05398933092753092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,3,0.05151999990145365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,7,0.054234668612480164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,7,0.05012799799442291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,15,0.0539626677831014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,15,0.04994666576385498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,31,0.05453333258628845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,31,0.05002133548259735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,63,0.05440000196297964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,63,0.05003199974695841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,127,0.05590933561325073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,127,0.04965866605440775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,255,0.05659733215967814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,255,0.05096533397833506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,1,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,1,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,511,0.08667733271916707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,3,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,3,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,7,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,7,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,511,0.07481066882610321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,15,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,15,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,31,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,31,0.010512000570694605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,63,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,63,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,127,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,127,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,255,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,255,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,511,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,511,0.011440000186363855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,1023,0.013338666409254074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,1023,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,2047,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,2047,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,4095,0.02310933421055476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,4095,0.021344001094500225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,8191,0.03549333413441976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,8191,0.029466666281223297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,16383,0.05784533421198527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,16383,0.049882665276527405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,32767,0.09913067022959392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,32767,0.08612266182899475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,65535,0.18106667200724283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,1,0.10105599959691365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,65535,0.15651733676592508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,1,0.08897599577903748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,3,0.1011306643486023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,3,0.0892693301041921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,7,0.09939199686050415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,7,0.08922666311264038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,15,0.10100266337394714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,15,0.0888266662756602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,31,0.10136000315348308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,31,0.08919466535250346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,63,0.09914132952690125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,63,0.08929066856702168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,127,0.09961600104967754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,127,0.08909866213798523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,255,0.10198932886123657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,255,0.0912000040213267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,fp8,1,0.17096000909805298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,fp8,3,0.17229866981506348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,float16,3,0.19132800896962485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,float16,1,0.19156799713770548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,float16,7,0.18980266650517783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,fp8,7,0.17234132687250772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,float16,15,0.19147199392318726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,fp8,15,0.17329599459966025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,float16,31,0.1898933251698812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,fp8,31,0.17197332779566446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,float16,63,0.1922559936841329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,1,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,fp8,63,0.17312000195185342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,1,0.01268799975514412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,3,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,3,0.01228800043463707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,7,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,7,0.01257066677014033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,15,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,15,0.012096000214417776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,31,0.011642667154471079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,31,0.012527999778588613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,63,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,63,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,float16,127,0.18757865826288858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,127,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,127,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,255,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,fp8,127,0.17015999555587769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,255,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,511,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,511,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,1023,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,1023,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,2047,0.02327466756105423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,2047,0.02130666623512904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,4095,0.0359253336985906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,4095,0.02976000060637792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,8191,0.0601440022389094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,8191,0.05003199974695841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,16383,0.10472533106803894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,16383,0.08669867118199666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,1,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,1,0.013167999684810638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,3,0.01350933313369751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,3,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,32767,0.18580265839894614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,32767,0.1567520002524058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,7,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,7,0.013397333522637686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,15,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,15,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,31,0.01470400020480156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,31,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,63,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,63,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,127,0.014256000518798828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,127,0.012746666868527731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,255,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,255,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,511,0.016805333395799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,511,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,1023,0.024245334168275196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,1023,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,2047,0.04994133114814758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,2047,0.03158933420976003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,4095,0.08256533245245616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,4095,0.04974400003751119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,8191,0.1469439963499705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,8191,0.08196799953778584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,1,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,1,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,3,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,3,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,16383,0.14587733149528503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,16383,0.27345067262649536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,7,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,7,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,15,0.010351999973257383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,15,0.011418666690587997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,31,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,31,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,63,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,63,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,127,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,255,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,127,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,255,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,511,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,511,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,1023,0.011509332805871964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,1023,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,2047,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,2047,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,4095,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,4095,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,8191,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,8191,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,16383,0.020997333029905956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,16383,0.017727999637524288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,32767,0.02595199892918269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,32767,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,1,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,1,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,3,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,65535,0.04700266818205515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,3,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,7,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,65535,0.029626667499542236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,7,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,15,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,15,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,31,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,31,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,63,0.01002133327225844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,63,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,127,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,127,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,255,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,255,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,511,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,511,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,1023,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,1023,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,2047,0.016890666137139004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,2047,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,4095,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,4095,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,8191,0.01978133370478948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,8191,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,16383,0.027509334186712902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,16383,0.021573332448800404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,32767,0.047872001926104225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,32767,0.03160533308982849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,1,0.01684800038735072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,65535,0.07773333291212718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,1,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,3,0.017466666797796886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,65535,0.046629334489504494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,3,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,7,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,7,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,15,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,15,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,31,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,31,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,63,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,63,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,127,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,127,0.015487999965747198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,255,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,255,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,511,0.02475733309984207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,511,0.02082666630546252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,1023,0.045567999283472695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,1023,0.02884799987077713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,2047,0.08257600168387096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,2047,0.049882665276527405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,4095,0.14692266782124838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,1,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,4095,0.08287466565767924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,1,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,8191,0.14436800281206766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,8191,0.28195732831954956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,3,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,3,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,7,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,15,0.010570666442314783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,7,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,15,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,31,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,63,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,63,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,31,0.011493333925803503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,127,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,127,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,255,0.010506667196750641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,255,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,511,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,511,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,1023,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,1023,0.014271999398867289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,2047,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,2047,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,4095,0.02141333371400833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,4095,0.018837332725524902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,8191,0.0271573339899381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,8191,0.021477334201335907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,16383,0.049642667174339294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,16383,0.03137599925200144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,32767,0.07870399951934814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,32767,0.04850133260091146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,1,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,65535,0.1393066644668579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,1,0.021546666820844013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,3,0.023823998868465424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,3,0.02111999938885371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,65535,0.0765119989713033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,7,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,7,0.02111999938885371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,15,0.023354666928450268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,15,0.021583999196688335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,31,0.023082666099071503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,31,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,63,0.02367466688156128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,63,0.02111999938885371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,127,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,127,0.02145066608985265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,255,0.02550933261712392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,255,0.021125334004561108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,511,0.04458666841189066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,511,0.03033600002527237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,1023,0.07344000041484833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,1023,0.04442666471004486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,2047,0.13863999644915262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,2047,0.0782293329636256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,1,0.0359253336985906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,4095,0.25944000482559204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,4095,0.1360213359196981
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,1,0.03183466692765554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,3,0.03585066646337509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,3,0.03170666595300039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,7,0.037845333417256675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,7,0.03147733211517334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,15,0.037647999823093414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,15,0.03179733455181122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,31,0.035962666074434914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,31,0.03163733333349228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,63,0.03762666632731756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,63,0.0316746657093366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,127,0.03758399933576584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,127,0.03169599920511246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,255,0.0443146675825119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,511,0.07266666491826375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,255,0.03440000116825104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,511,0.04809066653251648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,1023,0.13061867157618204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,1023,0.0747519979874293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,1,0.0543039987484614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,1,0.0653599997361501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,3,0.06468266745408376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,3,0.053616002202034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,7,0.0645546664794286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,7,0.054133335749308266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,15,0.0646613339583079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,15,0.05337599913279215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,31,0.06437866886456807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,63,0.06463466584682465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,31,0.0543146679798762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,63,0.05349866549173991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,127,0.06518400212128957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,127,0.05413866539796194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,255,0.07547733187675476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,255,0.05613866448402405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,1,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,1,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,3,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,511,0.13127999504407248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,511,0.08261866867542267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,7,0.011445333560307821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,7,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,15,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,15,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,31,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,31,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,63,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,63,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,127,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,127,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,255,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,255,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,511,0.011354666203260422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,511,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,1023,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,1023,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,2047,0.019786667078733444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,2047,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,4095,0.030970667799313862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,4095,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,8191,0.05460800230503082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,8191,0.03399466723203659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,16383,0.09410666426022847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,16383,0.05482666691144308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,32767,0.16768000523249307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,32767,0.09494400024414062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,65535,0.3205813368161519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,1,0.11902399857838948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,65535,0.17153066396713257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,1,0.10147733489672343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,3,0.11927466591199239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,3,0.10134933392206828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,7,0.1181599994500478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,7,0.10142933328946431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,15,0.11985599994659424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,15,0.10128000378608704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,31,0.11954666177431743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,31,0.10125333070755005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,63,0.11852266391118367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,63,0.10097066561381023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,127,0.11909332871437073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,127,0.10051199793815613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,255,0.10292266805966695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,255,0.1393226683139801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,float16,1,0.2241226633389791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,fp8,1,0.18786134322484335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,float16,3,0.22229333718617758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,fp8,3,0.18969066937764487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,float16,7,0.22433600823084512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,fp8,7,0.18804800510406494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,float16,15,0.22239466508229574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,fp8,15,0.18945600589116415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,float16,31,0.22419732809066772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,fp8,31,0.18740799029668173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,float16,63,0.22415467103322348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,1,0.012666666259368261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,fp8,63,0.18847467501958212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,1,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,3,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,3,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,7,0.012400000045696894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,7,0.011333333949247995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,15,0.012128000458081564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,15,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,float16,127,0.22458134094874063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,31,0.012133333832025528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,31,0.012709333250919977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,63,0.012362666428089142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,63,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,127,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,fp8,127,0.18552533785502115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,127,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,255,0.012773333738247553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,255,0.012469333906968435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,511,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,511,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,1023,0.0182239996890227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,1023,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,2047,0.031498665610949196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,2047,0.02330133318901062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,4095,0.03503466645876566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,4095,0.057349334160486855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,8191,0.0974720021088918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,8191,0.059936001896858215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,16383,0.10128000378608704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,16383,0.1745599905649821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,1,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,1,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,3,0.012165332833925882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,3,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,32767,0.18189332882563272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,7,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,7,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,32767,0.33374933401743573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,15,0.012341332932313284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,15,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,31,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,31,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,63,0.011760000139474869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,63,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,127,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,127,0.012709333250919977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,255,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,255,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,511,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,511,0.01340266689658165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,1023,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,1023,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,2047,0.023285334308942158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,2047,0.02160533269246419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,4095,0.039893334110577904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,4095,0.03148799886306127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,8191,0.06075199941794077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,8191,0.052015999952952065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,16383,0.1030346651871999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,16383,0.08813333511352539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,1,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,1,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,3,0.010037333394090334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,32767,0.18808533747990927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,3,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,32767,0.15664533774058023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,7,0.010170666500926018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,7,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,15,0.01055466632048289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,15,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,31,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,31,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,63,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,63,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,127,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,127,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,255,0.00938666673998038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,255,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,511,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,511,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,1023,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,1023,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,2047,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,2047,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,4095,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,4095,0.013712000101804733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,8191,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,8191,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,16383,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,16383,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,32767,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,32767,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,65535,0.02349333216746648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,65535,0.021477334201335907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,1,0.009119999905427298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,1,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,3,0.008885333314538002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,3,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,7,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,7,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,15,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,131071,0.038959999879201256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,15,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,31,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,31,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,63,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,131071,0.0314026673634847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,63,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,127,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,127,0.010464000205198923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,255,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,255,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,511,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,511,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,1023,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,1023,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,2047,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,2047,0.013274667163689932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,4095,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,4095,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,8191,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,8191,0.01628799984852473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,16383,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,16383,0.019343999524911244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,32767,0.0235359991590182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,32767,0.021514666577180225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,65535,0.03754666695992152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,65535,0.029520000020662945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,1,0.013914667069911957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,1,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,3,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,3,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,7,0.014805333067973455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,15,0.014853333433469137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,7,0.013237333546082178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,31,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,15,0.01403733342885971
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,131071,0.05863999823729197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,63,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,31,0.01340266689658165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,63,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,127,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,127,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,131071,0.05184000233809153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,255,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,255,0.013669333110253016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,511,0.016805333395799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,511,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,1023,0.021114667256673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,1023,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,2047,0.0397173340121905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,2047,0.031504000226656594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,4095,0.06132266422112783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,4095,0.052330667773882546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,8191,0.10377066334088643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,8191,0.08897067109743755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,1,0.010288000106811523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,1,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,3,0.009712000067035357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,3,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,7,0.009642666826645533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,16383,0.189903994401296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,16383,0.15677866339683533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,7,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,15,0.009477333476146063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,15,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,31,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,31,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,63,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,63,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,127,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,127,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,255,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,255,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,511,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,511,0.012416000167528788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,1023,0.011642667154471079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,1023,0.011546666423479715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,2047,0.014896000425020853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,2047,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,4095,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,4095,0.015509333461523056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,8191,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,8191,0.01747200017174085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,16383,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,16383,0.021551998953024547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,32767,0.03815466662247976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,32767,0.029904000461101532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,65535,0.05804799993832906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,65535,0.05190399785836538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,1,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,1,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,3,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,3,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,131071,0.10248532891273499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,7,0.01748266691962878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,7,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,15,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,131071,0.0885813335577647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,15,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,31,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,31,0.01736533393462499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,63,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,63,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,127,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,127,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,255,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,255,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,511,0.021381333470344543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,511,0.01930133377512296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,1023,0.035418666899204254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,1023,0.029674666623274486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,2047,0.05903466542561849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,2047,0.05011733373006185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,4095,0.09661866227785747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,4095,0.0804799993832906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,1,0.025466665625572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,1,0.02306666721900304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,8191,0.17152533928553262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,3,0.02532266577084859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,3,0.023418667415777843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,8191,0.1423520048459371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,7,0.02327466756105423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,7,0.02569066733121872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,15,0.025226667523384094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,31,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,31,0.0236160010099411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,15,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,63,0.025258667767047882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,63,0.023039999107519787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,127,0.025205334027608235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,127,0.023552000522613525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,255,0.025194667279720306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,255,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,511,0.037989333271980286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,511,0.03150933235883713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,1023,0.05624000231424967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,1023,0.04864533245563507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,2047,0.09991466999053955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,2047,0.083146666487058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,1,0.039546666045983635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,1,0.03597866743803024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,3,0.0395359992980957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,3,0.03563733398914337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,7,0.039674667020638786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,7,0.035674666364987694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,15,0.03957866628964742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,15,0.0356480007370313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,31,0.03957333415746689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,31,0.03573866685231527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,63,0.039477333426475525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,63,0.0354720006386439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,127,0.0397173340121905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,127,0.03561066587766012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,255,0.041749333341916404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,255,0.035445332527160645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,511,0.06251200040181477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,1,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,511,0.053861334919929504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,1,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,3,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,1023,0.09788800279299419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,3,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,1023,0.08278400202592213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,7,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,7,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,15,0.009573333586255709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,15,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,31,0.010559999694426855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,31,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,63,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,63,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,127,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,127,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,255,0.010490667074918747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,255,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,511,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,511,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,1023,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,1023,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,2047,0.015775999675194424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,2047,0.01479999969402949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,4095,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,4095,0.01757866640885671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,8191,0.023034666975339253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,8191,0.021189334491888683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,16383,0.03812800099452337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,16383,0.029648000995318096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,32767,0.058549334605534874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,32767,0.051818668842315674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,65535,0.09956266482671101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,65535,0.08664533495903015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,1,0.06911999980608623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,131071,0.18414932489395142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,1,0.06252799928188324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,131071,0.1585653324921926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,3,0.06850133339564006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,3,0.06225599845250448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,7,0.06851199766000111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,7,0.06248533229033152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,15,0.06868266562620799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,15,0.062405332922935486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,31,0.07051200171311696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,31,0.06303466856479645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,63,0.06936533252398173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,63,0.06246933341026306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,127,0.07066666583220164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,127,0.06247466802597046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,255,0.07247466842333476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,255,0.06440000236034393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,511,0.11153067151705424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,511,0.09528533617655437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,1,0.13014933466911316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,1,0.11556800206502278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,3,0.1300320029258728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,3,0.11626666784286499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,7,0.13013866543769836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,7,0.11561066905657451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,15,0.1300320029258728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,15,0.11780800422032674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,31,0.13008532921473184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,31,0.11585600177447002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,63,0.13019200166066489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,63,0.11763733625411987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,127,0.11708266536394756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,127,0.12788266936937967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,1,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,1,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,3,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,3,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,255,0.13193066914876303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,7,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,7,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,15,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,15,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,31,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,31,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,63,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,63,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,127,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,127,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,255,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,255,0.012400000045696894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,511,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,255,0.1181119978427887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,511,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,1023,0.012826666235923767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,1023,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,2047,0.017530667285124462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,2047,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,4095,0.02142400046189626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,4095,0.022831998765468597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,8191,0.03772799919048945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,8191,0.029546665648619335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,16383,0.05714133381843567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,16383,0.05072000126043955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,32767,0.09913067022959392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,32767,0.0869653324286143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,1,0.012805332740147909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,1,0.012752000242471695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,3,0.012815999488035837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,3,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,7,0.013290667285521826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,65535,0.1567626694838206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,65535,0.18233599265416464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,7,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,15,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,15,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,31,0.012768000364303589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,31,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,63,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,63,0.012634667257467905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,127,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,127,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,255,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,255,0.012757333616415659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,511,0.014538666854302088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,511,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,1023,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,1023,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,2047,0.039642666776975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,2047,0.023311999936898548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,4095,0.06107200185457865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,4095,0.039808000127474465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,8191,0.10326932867368062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,8191,0.06057600180308024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,16383,0.1895093321800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,16383,0.10051733255386353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,1,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,1,0.01180800050497055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,3,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,3,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,32767,0.36192532380421955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,7,0.010490667074918747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,32767,0.18224533398946127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,7,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,15,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,31,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,15,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,31,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,63,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,63,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,127,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,127,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,255,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,255,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,511,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,511,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,1023,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,1023,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,2047,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,2047,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,4095,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,4095,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,8191,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,8191,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,16383,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,16383,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,32767,0.02258666604757309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,32767,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,65535,0.03929600119590759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,65535,0.02362666775782903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,1,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,3,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,1,0.011941333611806234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,3,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,7,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,7,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,15,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,15,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,31,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,131071,0.06057600180308024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,31,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,63,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,127,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,63,0.011402666568756104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,127,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,255,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,255,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,511,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,131071,0.03972800076007843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,511,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,1023,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,1023,0.011642667154471079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,2047,0.013872000078360239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,2047,0.014271999398867289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,4095,0.016858667135238647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,4095,0.015909332782030106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,8191,0.017450666675964992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,8191,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,16383,0.021941334009170532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,16383,0.019653332730134327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,32767,0.02347733328739802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,32767,0.03756266583998998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,65535,0.05823466678460439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,65535,0.03756800045569738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,1,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,1,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,3,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,3,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,7,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,7,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,15,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,131071,0.05835733314355215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,131071,0.09988266229629517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,15,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,31,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,31,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,63,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,63,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,127,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,127,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,255,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,255,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,511,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,511,0.017093333105246227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,1023,0.03557866563399633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,1023,0.02149333308140437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,2047,0.06148266792297363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,2047,0.0401706670721372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,4095,0.10450133681297302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,4095,0.06125866870085398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,8191,0.18976000944773355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,8191,0.10338667035102844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,1,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,1,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,3,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,3,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,16383,0.3628693421681722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,7,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,16383,0.18648000558217367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,7,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,15,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,15,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,31,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,31,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,63,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,63,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,127,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,127,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,255,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,255,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,511,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,511,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,1023,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,1023,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,2047,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,2047,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,4095,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,4095,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,8191,0.020981334149837494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,8191,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,16383,0.03886399914820989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,16383,0.023455999791622162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,32767,0.060602664947509766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,32767,0.03761066744724909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,65535,0.10264533758163452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,65535,0.05807466804981232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,1,0.018960000326236088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,1,0.01798933371901512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,3,0.01933866615096728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,7,0.01945066700379054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,3,0.01802666609485944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,7,0.01736533393462499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,131071,0.19112000862757364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,15,0.019946667055288952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,15,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,31,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,31,0.01971199984351794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,131071,0.09957333405812581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,63,0.019365333020687103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,63,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,127,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,127,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,255,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,255,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,511,0.02330133318901062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,511,0.03389333436886469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,1023,0.05421333511670431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,1023,0.03411199897527695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,2047,0.09874133268992107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,2047,0.05888533095518748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,4095,0.17662400007247925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,4095,0.09497066338857015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,1,0.027258666853109997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,1,0.02517866591612498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,8191,0.16934933265050253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,8191,0.3348746697107951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,3,0.02739199995994568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,3,0.02500266581773758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,7,0.023530667026837666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,7,0.027632000545660656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,15,0.027285332481066387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,15,0.025253333151340485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,31,0.027535999814669292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,31,0.025413334369659424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,63,0.02714666724205017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,63,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,127,0.027285332481066387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,127,0.026943999032179516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,255,0.03366933266321818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,255,0.025034666061401367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,511,0.0539626677831014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,511,0.03775999943415324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,1023,0.09537600477536519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,1023,0.05434666574001312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,2047,0.1776640017827352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,2047,0.09858133395512898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,1,0.043882668018341064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,1,0.0395359992980957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,3,0.04609066744645437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,3,0.03958933303753535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,7,0.04595733185609182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,7,0.03977599988381068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,15,0.045893331368764244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,15,0.039781334499518074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,31,0.04606399933497111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,31,0.039594667653242745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,63,0.044906665881474815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,63,0.03830400109291077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,127,0.04786666731039683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,127,0.0377866675456365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,255,0.05393599967161814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,255,0.04125333329041799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,511,0.09363200267155965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,511,0.05975999931494395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,1,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,1023,0.17376534144083658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,1,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,3,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,3,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,1023,0.09497066338857015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,7,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,7,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,15,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,15,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,31,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,31,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,63,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,63,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,127,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,127,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,255,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,255,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,511,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,511,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,1023,0.013253333667914072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,1023,0.013248000293970108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,2047,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,2047,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,4095,0.02082666630546252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,4095,0.019760000209013622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,8191,0.039781334499518074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,8191,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,16383,0.06171200176080068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,16383,0.03948266555865606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,32767,0.10310399532318115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,32767,0.057818666100502014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,65535,0.18950400749842325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,65535,0.09714133540789287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,1,0.08291733264923096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,1,0.06855466465155284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,131071,0.17870932817459106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,131071,0.36457598209381104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,3,0.083146666487058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,3,0.06825600067774455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,7,0.08108800152937572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,7,0.06834133466084798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,15,0.08286400139331818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,15,0.0682666649421056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,31,0.08156266808509827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,31,0.06833066542943318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,63,0.0824480007092158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,63,0.06809066732724507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,127,0.08290133376916249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,127,0.06866133213043213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,255,0.07149333258469899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,511,0.1730453372001648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,511,0.10558933019638062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,1,0.154448002576828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,1,0.12997866670290628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,3,0.15265599886576334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,3,0.13011733690897623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,7,0.1302773356437683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,15,0.15450132886568704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,15,0.13024000326792398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,31,0.15449600418408713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,255,0.09737599889437358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,31,0.12999999523162842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,63,0.1544266641139984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,63,0.12998933593432108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,127,0.12918933232625326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,255,0.18017067511876425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,7,0.15316266814867655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,1,0.011472000430027643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,1,0.01138666644692421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,3,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,3,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,7,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,7,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,15,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,15,0.011488000551859537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,255,0.13293866316477457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,31,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,31,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,63,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,63,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,127,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,127,0.011407999942700068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,255,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,511,0.01232533281048139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,255,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,511,0.011365332951148352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,1023,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,1023,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,2047,0.020288000504175823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,2047,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,4095,0.038362666964530945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,4095,0.023546665906906128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,8191,0.06065066655476888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,8191,0.03855466594298681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,16383,0.10275200009346008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,16383,0.057114665706952415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,32767,0.18872533241907755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,32767,0.09629333019256592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,127,0.1539359986782074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,65535,0.17510400215784708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,65535,0.36032533645629883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,1,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,1,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,3,0.010570666442314783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,3,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,7,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,7,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,15,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,15,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,31,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,31,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,63,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,63,0.010496000448862711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,127,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,127,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,255,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,255,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,511,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,511,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,1023,0.012661332885424295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,1023,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,2047,0.017514667163292568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,4095,0.0230880007147789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,2047,0.017535999417304993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,4095,0.021418665846188862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,8191,0.03761066744724909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,8191,0.029477333029111225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,16383,0.058304001887639366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,16383,0.05189866820971171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,32767,0.08653866251309712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,32767,0.10147200028101604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,1,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,1,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,3,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,3,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,7,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,7,0.010533332824707031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,15,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,15,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,31,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,31,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,63,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,63,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,65535,0.18318933248519897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,65535,0.15705600380897522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,127,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,127,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,255,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,255,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,511,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,511,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,1023,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,1023,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,2047,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,2047,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,4095,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,4095,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,8191,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,8191,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,16383,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,16383,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,32767,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,32767,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,65535,0.021146667500336964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,65535,0.021354667842388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,1,0.00955200009047985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,1,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,3,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,3,0.009253333633144697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,7,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,7,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,15,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,15,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,31,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,31,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,63,0.010501333822806677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,131071,0.02924799919128418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,131071,0.02718399961789449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,63,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,127,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,127,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,255,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,511,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,255,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,511,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,1023,0.011391999820868174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,1023,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,2047,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,2047,0.012805332740147909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,4095,0.01481066644191742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,4095,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,8191,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,8191,0.015487999965747198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,16383,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,16383,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,32767,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,32767,0.018992000569899876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,65535,0.023472001155217487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,65535,0.0233599990606308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,1,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,1,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,3,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,3,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,7,0.012847999731699625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,131071,0.041109333435694374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,7,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,15,0.012165332833925882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,131071,0.031397332747777305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,15,0.012831999609867731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,31,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,63,0.012746666868527731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,31,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,63,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,127,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,127,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,255,0.0124746672809124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,255,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,511,0.012789333860079447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,511,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,1023,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,1023,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,2047,0.023520000278949738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,2047,0.02139200021823247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,4095,0.0394400010506312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,4095,0.03133333226044973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,8191,0.06137066582838694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,8191,0.051872000098228455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,16383,0.10328533252080281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,1,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,16383,0.08802133798599243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,1,0.010378666842977205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,3,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,7,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,7,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,15,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,15,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,31,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,31,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,32767,0.18926399946212769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,63,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,63,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,127,0.010538666198650995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,127,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,32767,0.15875200430552164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,255,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,255,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,511,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,511,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,1023,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,1023,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,2047,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,2047,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,4095,0.01628799984852473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,4095,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,8191,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,8191,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,16383,0.019952000429232914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,16383,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,32767,0.02310933421055476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,32767,0.025775998830795288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,65535,0.040789333482583366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,65535,0.03160000095764796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,1,0.014837333311637243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,1,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,3,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,3,0.01321600005030632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,7,0.01479999969402949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,7,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,15,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,15,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,131071,0.060933331648508705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,31,0.013167999684810638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,31,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,131071,0.05394133428732554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,63,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,63,0.012831999609867731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,127,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,127,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,255,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,255,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,511,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,511,0.015573333948850632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,1023,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,1023,0.020848001043001812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,2047,0.04009599983692169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,2047,0.031888000667095184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,4095,0.06217066446940104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,4095,0.054245332876841225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,8191,0.1055626670519511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,8191,0.08896000186602275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,1,0.017711999515692394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,16383,0.19075733423233032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,1,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,3,0.017514667163292568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,3,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,16383,0.158842662970225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,7,0.017551999539136887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,7,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,15,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,15,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,31,0.017488000293572743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,31,0.01746133342385292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,63,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,63,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,127,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,127,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,255,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,255,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,511,0.023290666441122692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,511,0.020207999895016353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,1023,0.03414933383464813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,1023,0.02741333345572154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,2047,0.05826666454474131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,2047,0.05005866785844167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,1,0.024527999262015026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,4095,0.09723200400670369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,1,0.023290666441122692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,3,0.024490666886170704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,4095,0.08229866623878479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,3,0.023370665808518726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,7,0.02533866713444392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,7,0.023306667804718018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,15,0.02537599951028824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,31,0.025439999997615814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,15,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,31,0.02311466634273529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,63,0.025173333783944447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,63,0.023365333676338196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,127,0.023770667612552643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,127,0.02309333284695943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,255,0.02510400116443634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,255,0.023152001202106476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,511,0.037808001041412354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,511,0.031397332747777305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,1023,0.05625066657861074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,1023,0.048698668678601585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,1,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,1,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,3,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,3,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,2047,0.09849066535631816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,7,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,7,0.011450666934251785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,15,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,15,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,2047,0.08298133313655853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,31,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,31,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,63,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,63,0.010565333068370819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,127,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,127,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,255,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,255,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,511,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,511,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,1023,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,1023,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,2047,0.014287999520699183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,2047,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,4095,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,4095,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,8191,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,8191,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,16383,0.02349333216746648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,16383,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,32767,0.03161599983771642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,32767,0.03994133323431015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,65535,0.05202666421731313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,65535,0.059578667084376015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,1,0.0372533326347669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,1,0.039877332746982574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,131071,0.0899839997291565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,131071,0.10196266571680705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,3,0.03973866750796636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,3,0.03751999884843826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,7,0.039701332648595176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,7,0.035760000348091125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,15,0.03967999915281931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,15,0.03601066768169403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,31,0.03962666789690653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,31,0.03576533248027166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,63,0.03963200002908707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,63,0.03579200059175491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,127,0.0360959991812706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,127,0.0391893337170283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,255,0.04180799921353658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,255,0.03585066646337509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,511,0.06224533418814341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,511,0.0540533314148585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,1023,0.09756267070770264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,1023,0.08308800061543782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,1,0.06855466465155284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,3,0.06906133393446605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,3,0.06263466676076253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,1,0.06296533346176147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,7,0.06853333115577698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,7,0.06202666461467743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,15,0.06996266543865204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,15,0.06338133414586385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,31,0.06895466645558675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,31,0.062122667829195656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,63,0.06960533559322357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,63,0.0626933326323827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,127,0.07030400137106578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,127,0.06229866544405619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,255,0.07274133463700612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,255,0.06520533561706543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,1,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,1,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,3,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,3,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,7,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,7,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,15,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,511,0.11142399907112122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,15,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,31,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,63,0.010618666807810465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,31,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,63,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,127,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,127,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,255,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,255,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,511,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,511,0.011370666325092316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,1023,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,1023,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,2047,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,511,0.09512533744176228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,2047,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,4095,0.018976000448067982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,4095,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,8191,0.023525332411130268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,8191,0.021168000996112823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,16383,0.0382080003619194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,16383,0.030346666773160298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,32767,0.059749335050582886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,32767,0.051813334226608276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,65535,0.08694932858149211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,65535,0.10033599535624187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,1,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,1,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,3,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,3,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,7,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,7,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,15,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,15,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,31,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,31,0.011349332829316458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,63,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,63,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,127,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,127,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,255,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,255,0.011413333316644033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,511,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,511,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,131071,0.15853866934776306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,131071,0.1848479906717936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,1023,0.014709333578745524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,1023,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,2047,0.019621333728233974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,2047,0.018895999838908512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,4095,0.03955200066169103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,4095,0.023189333577950794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,8191,0.06030400097370148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,8191,0.03842666745185852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,16383,0.05810666580994924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,16383,0.10251200199127197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,32767,0.096778670946757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,32767,0.18519467115402222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,1,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,1,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,3,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,3,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,7,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,7,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,65535,0.3559733231862386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,15,0.009450666606426239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,15,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,31,0.009765333185593287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,31,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,63,0.00890666681031386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,63,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,127,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,127,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,65535,0.17668267091115317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,255,0.009194666519761086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,255,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,511,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,511,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,1023,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,1023,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,2047,0.011749333391586939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,2047,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,4095,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,4095,0.012096000214417776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,8191,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,8191,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,16383,0.019519999623298645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,16383,0.01800000046690305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,32767,0.02110933264096578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,32767,0.019413333386182785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,65535,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,65535,0.02120000123977661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,1,0.010159999753038088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,1,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,3,0.010549332946538925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,3,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,7,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,7,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,131071,0.04423466821511587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,15,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,15,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,31,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,31,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,63,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,131071,0.028698667883872986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,63,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,127,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,127,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,255,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,255,0.01051733394463857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,511,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,511,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,1023,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,1023,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,2047,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,2047,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,4095,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,4095,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,8191,0.017530667285124462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,8191,0.017711999515692394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,16383,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,16383,0.017445333302021027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,32767,0.023168000082174938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,32767,0.020741333564122517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,65535,0.03984000037113825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,65535,0.02346666653951009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,1,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,1,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,3,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,3,0.011391999820868174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,7,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,7,0.011946666985750198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,15,0.012639999389648438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,15,0.012304000556468964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,31,0.013167999684810638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,31,0.011962667107582092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,131071,0.06234133243560791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,63,0.013269333789745966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,63,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,131071,0.0415040006240209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,127,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,127,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,255,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,255,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,511,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,511,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,1023,0.01897066707412402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,1023,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,2047,0.03977599988381068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,2047,0.02348266790310542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,4095,0.06061333417892456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,4095,0.039818666875362396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,8191,0.06041066845258077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,8191,0.10345066587130229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,16383,0.18891199429829916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,16383,0.10126399993896484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,1,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,1,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,3,0.009882666791478792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,7,0.009989333028594652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,32767,0.18336532513300577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,7,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,32767,0.35628267129262287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,15,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,31,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,15,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,31,0.011600000162919363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,63,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,63,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,127,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,127,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,255,0.010464000205198923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,255,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,511,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,511,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,1023,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,1023,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,2047,0.01579733317097028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,2047,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,4095,0.0174346665541331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,4095,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,8191,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,8191,0.01809599995613098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,16383,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,16383,0.02081599955757459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,32767,0.04141333450873693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,32767,0.025439999997615814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,65535,0.06132799883683523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,65535,0.04049599915742874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,1,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,1,0.013557333499193192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,3,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,3,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,131071,0.10413866241772969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,7,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,7,0.013327999661366144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,15,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,131071,0.06158400078614553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,15,0.013295999417702356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,31,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,63,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,31,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,63,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,127,0.014736000448465347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,127,0.01339200014869372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,255,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,255,0.013167999684810638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,511,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,511,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,1023,0.021205333371957142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,1023,0.03540800015131632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,2047,0.06233066817124685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,2047,0.04044266790151596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,4095,0.06235733131567637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,4095,0.10414933164914449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,8191,0.18998400370279947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,8191,0.10447999835014343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,1,0.01939733326435089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,1,0.017509333789348602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,3,0.019466667125622433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,16383,0.18714666366577148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,16383,0.3612480163574219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,3,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,7,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,7,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,15,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,15,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,31,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,31,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,63,0.019472000499566395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,63,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,127,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,127,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,255,0.01915733392039935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,255,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,511,0.034202667574087776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,511,0.021013334393501282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,1023,0.05440000196297964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,1023,0.03379199902216593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,2047,0.09889066219329834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,2047,0.05811200042565664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,1,0.027701333165168762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,4095,0.17508800824483237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,4095,0.09628267089525859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,1,0.023525332411130268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,3,0.02722666660944621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,3,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,7,0.027637332677841187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,7,0.024842667082945507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,15,0.027210667729377747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,15,0.023285334308942158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,31,0.02718399961789449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,31,0.02462399999300639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,63,0.027600000301996868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,63,0.023573334018389385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,127,0.02752000093460083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,127,0.02347733328739802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,255,0.03388266762097677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,255,0.02401600033044815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,511,0.054144000013669334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,511,0.03670933345953623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,1023,0.09308800101280212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,1023,0.05426666637261709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,1,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,1,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,3,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,2047,0.17780800660451254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,3,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,7,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,7,0.010549332946538925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,2047,0.09745066364606221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,15,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,15,0.01051733394463857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,31,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,31,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,63,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,63,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,127,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,127,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,255,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,255,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,511,0.011434666812419891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,511,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,1023,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,1023,0.012784000486135483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,2047,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,2047,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,4095,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,4095,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,8191,0.021456000705560047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,8191,0.018874666343132656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,16383,0.03893866638342539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,16383,0.023530667026837666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,32767,0.06052266558011373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,32767,0.0379573330283165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,65535,0.10322667161623637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,65535,0.05780800183614095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,1,0.04601066807905833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,131071,0.1891040007273356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,1,0.04004266609748205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,131071,0.09946667154630025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,3,0.039701332648595176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,3,0.04460800190766653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,7,0.045824001232783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,7,0.03969600051641464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,15,0.037802666425704956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,15,0.04607999821503957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,31,0.04588800172011057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,31,0.03965866565704346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,63,0.04558399816354116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,63,0.03972266614437103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,127,0.04807466765244802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,127,0.039264000952243805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,255,0.05423999826113383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,255,0.04105599969625473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,511,0.09449600179990132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,511,0.060549333691596985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,1023,0.170522669951121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,1023,0.09494400024414062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,1,0.08298666775226593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,1,0.0688266654809316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,3,0.06843199829260509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,3,0.08276266853014629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,7,0.08293866614500682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,7,0.06845866640408833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,15,0.06829866766929626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,15,0.08281066517035167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,31,0.08255466818809509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,31,0.06854933500289917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,63,0.08193600177764893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,63,0.06837333242098491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,127,0.08283199866612752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,127,0.06947200000286102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,255,0.0728053351243337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,255,0.09577600161234538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,1,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,1,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,3,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,3,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,7,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,7,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,15,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,15,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,31,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,31,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,511,0.10571733117103577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,511,0.17303999265034994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,63,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,63,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,127,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,127,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,255,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,255,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,511,0.012730666746695837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,511,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,1023,0.013253333667914072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,1023,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,2047,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,2047,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,4095,0.02143999934196472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,4095,0.018960000326236088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,8191,0.023823998868465424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,8191,0.03977066775163015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,16383,0.06038400034109751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,16383,0.03915199885765711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,32767,0.05804799993832906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,32767,0.10221333305040996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,65535,0.18810133139292398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,65535,0.09730133414268494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,1,0.01044800008336703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,1,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,3,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,3,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,7,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,7,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,15,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,131071,0.3569653431574504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,15,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,31,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,31,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,131071,0.17974932988484701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,63,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,63,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,127,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,127,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,255,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,255,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,511,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,511,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,1023,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,1023,0.012159999459981918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,2047,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,2047,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,4095,0.01937066639463107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,4095,0.01884799947341283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,8191,0.025861332813898723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,8191,0.021429332594076794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,16383,0.039594667653242745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,16383,0.03147733211517334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,32767,0.06037333110968272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,32767,0.05188799897829691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,65535,0.10129066308339436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,65535,0.08678932984670003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,1,0.009242666885256767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,1,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,3,0.00919999989370505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,7,0.00961599995692571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,3,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,15,0.009306666751702627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,7,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,15,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,31,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,31,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,63,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,63,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,127,0.01055466632048289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,127,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,255,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,511,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,255,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,511,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,1023,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,1023,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,2047,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,131071,0.1585493286450704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,131071,0.18500266472498575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,2047,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,4095,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,4095,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,8191,0.016869333883126576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,8191,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,16383,0.021183999876181286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,16383,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,32767,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,32767,0.021776000658671062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,65535,0.024527999262015026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,65535,0.023546665906906128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,1,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,1,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,3,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,3,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,7,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,7,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,15,0.008853333070874214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,15,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,31,0.009269333134094873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,31,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,131071,0.027280000348885853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,63,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,63,0.008949333180983862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,127,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,127,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,255,0.009706666693091393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,255,0.011237333218256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,511,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,511,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,131071,0.027098665634791057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,1023,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,1023,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,2047,0.01227733368674914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,2047,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,4095,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,4095,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,8191,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,8191,0.016842667013406754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,16383,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,16383,0.0174346665541331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,32767,0.021082667013009388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,32767,0.019445333629846573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,65535,0.021354667842388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,65535,0.02107733239730199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,1,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,1,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,3,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,3,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,7,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,131071,0.02886933336655299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,7,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,15,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,15,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,31,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,131071,0.02749866743882497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,31,0.011338666081428528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,63,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,63,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,127,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,127,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,255,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,255,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,511,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,511,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,1023,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,1023,0.012831999609867731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,2047,0.01781333362062772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,2047,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,4095,0.02293333411216736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,4095,0.021253332495689392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,8191,0.039173332353432976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,8191,0.02951466788848241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,16383,0.05923733115196228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,16383,0.05234666665395101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,32767,0.10110933581988017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,1,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,32767,0.08715732892354329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,1,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,3,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,3,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,7,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,7,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,15,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,15,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,31,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,31,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,63,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,63,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,127,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,127,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,255,0.009818666925032934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,255,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,511,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,511,0.011381333072980246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,1023,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,1023,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,65535,0.18449066082636514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,2047,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,2047,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,65535,0.15631999572118124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,4095,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,4095,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,8191,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,8191,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,16383,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,16383,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,32767,0.02139200021823247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,32767,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,65535,0.025610665480295818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,1,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,65535,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,1,0.012778667112191519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,3,0.011461333682139715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,3,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,7,0.011541333049535751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,7,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,15,0.011829332758982977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,15,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,131071,0.043765331308046974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,31,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,131071,0.03346133232116699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,31,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,63,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,63,0.012762666990359625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,127,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,127,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,255,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,255,0.012815999488035837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,511,0.013327999661366144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,511,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,1023,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,1023,0.014752000570297241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,2047,0.024218666056791942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,2047,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,4095,0.039674667020638786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,4095,0.031189332405726116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,8191,0.06089599927266439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,8191,0.05216533442338308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,16383,0.10521599650382996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,16383,0.08755200107892354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,1,0.013349333157142004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,1,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,3,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,32767,0.18944533665974936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,3,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,7,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,7,0.013365333278973898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,15,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,32767,0.1585813363393148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,15,0.013269333789745966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,31,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,31,0.013562666873137156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,63,0.013552000125249227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,63,0.013295999417702356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,127,0.013295999417702356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,127,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,255,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,255,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,511,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,511,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,1023,0.021136000752449036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,1023,0.020560000091791153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,2047,0.04030400017897288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,2047,0.0315733328461647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,4095,0.06285333136717479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,4095,0.05362133185068766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,1,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,8191,0.1049013336499532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,8191,0.08892800410588582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,1,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,3,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,3,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,7,0.0174346665541331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,7,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,15,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,15,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,31,0.01736533393462499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,31,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,63,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,63,0.016864000509182613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,127,0.01685333376129468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,127,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,255,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,255,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,511,0.0232640008131663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,511,0.021231998999913532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,1023,0.034474665919939675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,1023,0.028970666229724884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,2047,0.050330668687820435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,2047,0.058970664938290916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,1,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,1,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,3,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,3,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,7,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,7,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,4095,0.09693333506584167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,15,0.010496000448862711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,15,0.010533332824707031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,31,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,31,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,63,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,63,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,127,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,4095,0.08085866769154866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,127,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,255,0.00973866693675518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,255,0.010485333700974783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,511,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,511,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,1023,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,1023,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,2047,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,2047,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,4095,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,4095,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,8191,0.018592000007629395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,8191,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,16383,0.02124800036350886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,16383,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,32767,0.025237334271272022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,32767,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,65535,0.04110399881998698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,65535,0.03075733284155528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,1,0.02526933451493581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,1,0.023221333821614582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,131071,0.052330667773882546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,131071,0.06227200229962667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,3,0.02517866591612498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,3,0.02332799881696701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,7,0.025445332129796345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,7,0.0233599990606308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,15,0.025072000920772552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,15,0.02359466751416524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,31,0.025125332176685333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,31,0.023397333920001984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,63,0.025098666548728943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,63,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,127,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,127,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,255,0.024821333587169647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,255,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,511,0.03774933268626531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,511,0.03143999973932902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,1023,0.05635733405749003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,1023,0.04994133114814758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,2047,0.0986293355623881
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,2047,0.08284266789754231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,1,0.03951466580231985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,1,0.037845333417256675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,3,0.03972800076007843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,3,0.035589332381884255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,7,0.03956266740957896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,15,0.03963200002908707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,7,0.037605332831541695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,15,0.03590933233499527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,31,0.03976533313592275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,31,0.03729599962631861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,63,0.039664000272750854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,63,0.035743998984495796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,127,0.03953066716591517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,255,0.04266133407751719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,255,0.03544000039498011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,511,0.06225066880385081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,511,0.05402133365472158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,1,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,1,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,3,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,1023,0.09732266267140706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,3,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,7,0.010549332946538925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,7,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,15,0.008922666932145754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,15,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,31,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,31,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,63,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,1023,0.08293866614500682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,63,0.011338666081428528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,127,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,127,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,255,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,255,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,511,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,511,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,1023,0.011413333316644033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,1023,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,2047,0.01544533297419548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,2047,0.014256000518798828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,4095,0.01565333331624667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,4095,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,8191,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,8191,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,16383,0.023285334308942158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,16383,0.02147199958562851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,32767,0.039674667020638786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,32767,0.03141333411137263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,65535,0.05997333427270254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,127,0.03742400060097376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,65535,0.0517546683549881
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,1,0.011626667032639185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,1,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,3,0.011333333949247995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,3,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,7,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,7,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,15,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,15,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,31,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,31,0.011472000430027643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,63,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,63,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,127,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,127,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,131071,0.10318932930628459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,255,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,255,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,511,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,511,0.011578666667143503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,1023,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,1023,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,2047,0.01754666616519292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,2047,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,4095,0.021045332153638203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,131071,0.08907199899355571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,4095,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,8191,0.041375999649365745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,8191,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,16383,0.06262399752934773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,16383,0.039733332892258964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,32767,0.10363733768463135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,32767,0.05962666869163513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,65535,0.18898133436838785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,65535,0.09877866506576538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,1,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,1,0.009423999736706415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,3,0.009248000259200731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,3,0.009066666786869368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,7,0.009194666519761086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,7,0.009178666397929192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,15,0.009098666409651438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,15,0.00898133342464765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,31,0.009039999917149544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,31,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,63,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,63,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,127,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,131071,0.36030399799346924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,127,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,255,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,255,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,511,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,511,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,1023,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,1023,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,131071,0.18070934216181436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,2047,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,2047,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,4095,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,4095,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,8191,0.015599999576807022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,8191,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,16383,0.01939733326435089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,16383,0.021018666525681812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,32767,0.023226665953795116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,32767,0.023034666975339253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,65535,0.025802666942278545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,65535,0.023743999501069386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,1,0.010197333370645842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,3,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,1,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,3,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,7,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,7,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,15,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,15,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,31,0.010266666611035665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,31,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,63,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,131071,0.0317493329445521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,63,0.010570666442314783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,127,0.009930666536092758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,131071,0.027429332335789997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,127,0.010570666442314783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,255,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,255,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,511,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,511,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,1023,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,1023,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,2047,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,2047,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,4095,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,4095,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,8191,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,8191,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,16383,0.019760000209013622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,16383,0.019194666296243668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,32767,0.02203733225663503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,32767,0.02109333376089732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,65535,0.02216533323129018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,65535,0.025722667574882507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,1,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,1,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,3,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,3,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,7,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,7,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,15,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,31,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,31,0.011349332829316458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,131071,0.028704000016053517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,131071,0.04684266448020935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,63,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,63,0.011407999942700068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,127,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,127,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,255,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,255,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,511,0.012789333860079447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,511,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,1023,0.012752000242471695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,1023,0.01333333303531011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,2047,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,2047,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,4095,0.03977066775163015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,4095,0.023472001155217487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,8191,0.06035733222961426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,8191,0.03823466598987579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,16383,0.10218666990598042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,16383,0.058058664202690125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,32767,0.18761066595713297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,32767,0.09889066219329834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,1,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,1,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,3,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,3,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,7,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,7,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,15,0.010533332824707031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,15,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,31,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,65535,0.3547733227411906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,31,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,63,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,63,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,127,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,127,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,65535,0.17767467101415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,255,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,255,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,511,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,511,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,1023,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,1023,0.011461333682139715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,2047,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,2047,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,4095,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,4095,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,8191,0.019391999890406925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,8191,0.019194666296243668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,16383,0.021141332884629566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,16383,0.019365333020687103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,32767,0.02330133318901062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,32767,0.020992000897725422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,65535,0.042261332273483276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,65535,0.025706666211287182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,1,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,1,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,3,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,3,0.011887999872366587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,7,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,15,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,7,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,31,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,15,0.012133333832025528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,131071,0.0645066648721695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,63,0.013525333255529404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,31,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,63,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,131071,0.044165333112080894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,127,0.012671999633312225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,127,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,255,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,255,0.011407999942700068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,511,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,511,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,1023,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,1023,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,2047,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,2047,0.04026666780312856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,4095,0.06141866743564606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,4095,0.04030933231115341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,8191,0.061493332187334694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,8191,0.10378133257230122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,16383,0.18841065963109335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,16383,0.10297600428263347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,1,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,1,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,3,0.014757333944241205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,3,0.014826666563749313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,7,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,7,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,15,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,15,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,32767,0.18336000045140585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,32767,0.35891199111938477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,31,0.014805333067973455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,31,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,63,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,63,0.013167999684810638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,127,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,127,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,255,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,255,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,511,0.018837332725524902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,511,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,1023,0.03540800015131632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,1023,0.021231998999913532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,2047,0.06187733511130015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,2047,0.040005333721637726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,4095,0.1053706705570221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,4095,0.06168533364931742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,1,0.019088000059127808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,1,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,8191,0.10394133130709331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,8191,0.18967467546463013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,3,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,3,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,7,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,7,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,15,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,15,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,31,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,31,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,63,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,63,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,127,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,127,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,255,0.018960000326236088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,255,0.0174346665541331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,511,0.033941333492596946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,511,0.021744000415007275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,1023,0.054325332244237266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,1023,0.03384533276160558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,2047,0.09914132952690125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,2047,0.05821866790453593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,1,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,1,0.011365332951148352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,3,0.01033599985142549
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,3,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,4095,0.17813332875569662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,7,0.010128000130256018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,7,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,15,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,15,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,31,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,31,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,4095,0.09672000010808308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,63,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,63,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,127,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,127,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,255,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,255,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,511,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,511,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,1023,0.011637333780527115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,1023,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,2047,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,2047,0.01553600033124288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,4095,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,4095,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,8191,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,8191,0.019039999693632126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,16383,0.023306667804718018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,16383,0.021151999632517498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,32767,0.025519999365011852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,32767,0.042394667863845825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,65535,0.06047466893990835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,65535,0.04158399999141693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,1,0.027290667096773785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,1,0.02516266703605652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,131071,0.10257599751154582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,131071,0.06225599845250448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,3,0.027535999814669292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,3,0.025258667767047882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,7,0.027461332579453785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,7,0.025216000775496166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,15,0.027386667827765148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,15,0.025445332129796345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,31,0.027189334233601887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,31,0.025072000920772552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,63,0.025285333395004272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,63,0.027434666951497395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,127,0.02735466758410136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,127,0.023434666295846302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,255,0.03417066733042399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,255,0.023200000325838726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,511,0.03728533287843069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,511,0.054272000988324486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,1023,0.09291733304659526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,1023,0.05475200215975443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,2047,0.09714667002360027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,2047,0.17707200845082602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,1,0.04598933458328247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,1,0.03974399964014689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,3,0.045509333411852516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,3,0.039594667653242745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,7,0.04572266836961111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,7,0.039450667798519135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,15,0.03973866750796636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,15,0.045696000258127846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,31,0.04554133117198944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,31,0.03955733279387156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,63,0.03962666789690653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,127,0.04798933366934458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,63,0.045834665497144066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,127,0.03968533376852671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,255,0.055344000458717346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,255,0.04179200033346812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,511,0.09319466352462769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,511,0.060565332571665444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,1,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,1,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,3,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,3,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,7,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,7,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,15,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,15,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,31,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,31,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,63,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,63,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,127,0.010559999694426855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,127,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,255,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,1023,0.17094399531682333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,1023,0.09558399518330891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,255,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,511,0.011440000186363855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,511,0.011391999820868174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,1023,0.011760000139474869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,1023,0.012069333344697952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,2047,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,2047,0.014752000570297241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,4095,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,4095,0.01628799984852473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,8191,0.02144533395767212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,8191,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,16383,0.039813332259655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,16383,0.02365333338578542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,32767,0.06108266611893972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,32767,0.03766933331886927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,65535,0.10329066713651021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,65535,0.060122668743133545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,1,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,1,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,3,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,3,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,7,0.009573333586255709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,7,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,15,0.009365333244204521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,15,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,131071,0.19057067235310873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,31,0.010149333626031876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,31,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,63,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,63,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,255,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,127,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,127,0.01156266654531161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,131071,0.10115200281143188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,255,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,511,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,511,0.011866666376590729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,1023,0.012650666137536367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,1023,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,2047,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,2047,0.018021332720915478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,4095,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,4095,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,8191,0.018746666610240936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,8191,0.017685333887736004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,16383,0.021482666333516438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,16383,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,32767,0.03995199998219808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,32767,0.03173333406448364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,65535,0.052154665191968284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,65535,0.060080001751581825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,1,0.00890666681031386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,1,0.009109333157539368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,3,0.00895999992887179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,3,0.008869333192706108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,7,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,7,0.009178666397929192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,15,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,15,0.009061333412925402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,31,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,31,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,63,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,63,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,127,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,127,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,255,0.010368000095089277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,255,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,511,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,511,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,1023,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,1023,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,2047,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,2047,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,4095,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,4095,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,8191,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,8191,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,131071,0.10409599542617798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,131071,0.09084799885749817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,16383,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,16383,0.019152000546455383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,32767,0.027722666660944622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,32767,0.025466665625572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,65535,0.029669334491093952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,1,0.00897066667675972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,65535,0.03156266609827677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,1,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,3,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,3,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,7,0.009066666786869368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,7,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,15,0.009050666665037474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,15,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,31,0.00949866697192192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,131071,0.03401600072781245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,131071,0.033914667864640556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,31,0.010346666599313417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,63,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,63,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,127,0.009829333052039146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,127,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,255,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,255,0.010304000228643417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,511,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,511,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,1023,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,1023,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,2047,0.01180800050497055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,2047,0.012805332740147909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,4095,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,4095,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,8191,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,8191,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,16383,0.01930133377512296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,16383,0.019424000134070713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,32767,0.023525332411130268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,32767,0.021712000171343487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,65535,0.02586666742960612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,65535,0.02372266600529353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,1,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,1,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,3,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,3,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,131071,0.027269333600997925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,7,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,7,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,15,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,15,0.010522666076819101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,31,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,131071,0.02552533398071925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,31,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,63,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,63,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,127,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,127,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,255,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,255,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,511,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,511,0.012784000486135483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,1023,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,1023,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,2047,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,2047,0.01657066618402799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,4095,0.018954666952292126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,4095,0.018885333091020584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,8191,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,8191,0.021482666333516438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,16383,0.03988266736268997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,16383,0.031445334355036415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,32767,0.060346667965253196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,32767,0.05197866757710775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,65535,0.10133866469065349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,65535,0.08756800492604573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,1,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,3,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,1,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,7,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,3,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,7,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,15,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,15,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,31,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,31,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,63,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,127,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,63,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,255,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,127,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,255,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,511,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,511,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,1023,0.010575999816258749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,1023,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,2047,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,2047,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,4095,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,4095,0.013471999516089758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,8191,0.01743999992807706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,8191,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,16383,0.019258666783571243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,16383,0.019466667125622433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,32767,0.02160000056028366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,32767,0.02146666745344798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,131071,0.1879253387451172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,131071,0.1588159998257955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,65535,0.023455999791622162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,65535,0.021477334201335907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,1,0.011338666081428528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,1,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,3,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,3,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,7,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,7,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,131071,0.0276053324341774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,131071,0.02962133288383484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,15,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,15,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,31,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,31,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,63,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,63,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,127,0.01138666644692421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,127,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,255,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,255,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,511,0.011477333803971609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,511,0.011557333171367645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,1023,0.012741333494583765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,1023,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,2047,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,2047,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,4095,0.02348266790310542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,4095,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,8191,0.03915199885765711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,8191,0.031311998764673867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,16383,0.06018133461475372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,16383,0.053082664807637535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,32767,0.10130666693051656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,1,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,32767,0.08733333150545756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,1,0.011333333949247995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,3,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,3,0.01209066684047381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,7,0.011237333218256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,7,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,15,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,15,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,31,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,31,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,63,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,63,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,127,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,127,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,255,0.012810666114091873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,255,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,65535,0.18578133980433145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,511,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,511,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,1023,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,1023,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,2047,0.02384000023206075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,65535,0.1565013329188029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,2047,0.021327999730904896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,4095,0.0401653324564298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,4095,0.03183466692765554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,8191,0.062319998939832054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,8191,0.05216533442338308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,1,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,3,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,1,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,16383,0.10563733180363973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,3,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,7,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,16383,0.08922132849693298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,7,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,15,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,15,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,31,0.014757333944241205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,63,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,31,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,63,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,127,0.014250667144854864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,127,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,255,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,255,0.012815999488035837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,511,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,511,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,1023,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,1023,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,2047,0.04084266722202301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,2047,0.03136533250411352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,4095,0.0629066675901413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,4095,0.053541332483291626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,1,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,1,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,3,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,3,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,7,0.010496000448862711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,7,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,15,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,15,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,31,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,31,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,63,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,63,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,127,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,127,0.01156266654531161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,255,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,255,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,8191,0.10655466715494792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,8191,0.08891733487447102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,511,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,511,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,1023,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,1023,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,2047,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,2047,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,4095,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,4095,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,8191,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,8191,0.01757866640885671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,16383,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,16383,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,32767,0.02128000060717265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,32767,0.021242665747801464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,65535,0.025231999655564625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,65535,0.02385599911212921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,1,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,131071,0.04475200176239014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,1,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,131071,0.03161599983771642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,3,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,3,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,7,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,7,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,15,0.01748266691962878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,15,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,31,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,63,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,63,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,127,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,31,0.017488000293572743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,127,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,255,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,255,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,511,0.023285334308942158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,511,0.021183999876181286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,1023,0.03482666611671448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,1023,0.027280000348885853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,2047,0.05823466678460439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,2047,0.04987200101216634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,1,0.02550400048494339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,4095,0.09707199533780415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,1,0.023621333142121632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,4095,0.08109866579373677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,3,0.025477332373460133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,3,0.023573334018389385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,7,0.025333332518736523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,7,0.023242667317390442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,15,0.025120000044504803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,15,0.023685333629449207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,31,0.0232640008131663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,63,0.02534399926662445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,127,0.025306666890780132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,127,0.023418667415777843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,255,0.02534399926662445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,255,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,511,0.03762666632731756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,511,0.03180266668399175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,31,0.025439999997615814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,63,0.023706667125225067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,1023,0.056314667065938316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,1,0.009365333244204521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,1,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,3,0.009045333291093508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,3,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,7,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,1023,0.04929066697756449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,7,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,15,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,15,0.011381333072980246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,31,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,31,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,63,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,63,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,127,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,127,0.011237333218256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,255,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,255,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,511,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,511,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,1023,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,1023,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,2047,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,2047,0.0978559950987498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,2047,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,4095,0.015967999895413715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,4095,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,8191,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,8191,0.01752000053723653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,16383,0.01979200045267741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,2047,0.08411199847857158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,16383,0.019589333484570186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,32767,0.02550400048494339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,32767,0.023717333873112995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,65535,0.041322665909926094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,65535,0.03136000037193298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,1,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,1,0.011450666934251785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,3,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,3,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,7,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,7,0.011440000186363855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,15,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,131071,0.06358399987220764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,15,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,131071,0.053583999474843345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,31,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,31,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,63,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,63,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,127,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,127,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,255,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,255,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,511,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,511,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,1023,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,1023,0.01219733307758967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,2047,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,2047,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,4095,0.01829333355029424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,4095,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,8191,0.02125866711139679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,8191,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,16383,0.039781334499518074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,16383,0.02347733328739802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,32767,0.0624533345301946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,32767,0.0400693342089653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,65535,0.10400533676147461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,65535,0.05906666815280914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,1,0.009088000282645226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,1,0.010298666854699453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,3,0.00901333304742972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,3,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,7,0.008954666554927826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,15,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,7,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,15,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,31,0.011349332829316458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,31,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,63,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,63,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,127,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,127,0.011434666812419891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,255,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,255,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,131071,0.19163199265797934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,511,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,511,0.011440000186363855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,1023,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,1023,0.011535999675591787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,2047,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,2047,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,4095,0.012693333129088083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,4095,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,8191,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,8191,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,16383,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,16383,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,131071,0.10219732920328777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,32767,0.027664000789324444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,32767,0.026848000784715016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,65535,0.031845333675543465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,1,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,65535,0.03175999969244003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,1,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,3,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,3,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,7,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,7,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,15,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,15,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,31,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,31,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,63,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,63,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,131071,0.03523733218510946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,127,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,131071,0.03540800015131632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,127,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,255,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,255,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,511,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,511,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,1023,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,1023,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,2047,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,2047,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,4095,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,4095,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,8191,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,8191,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,16383,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,16383,0.019567999988794327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,32767,0.02409599969784419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,32767,0.02404800057411194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,65535,0.02554133286078771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,65535,0.02537599951028824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,1,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,1,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,3,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,3,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,131071,0.03126399964094162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,7,0.011551999797423681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,15,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,7,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,31,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,15,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,131071,0.027269333600997925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,63,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,31,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,63,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,127,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,127,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,255,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,255,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,511,0.011407999942700068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,511,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,1023,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,1023,0.012714666624863943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,2047,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,2047,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,4095,0.021514666577180225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,4095,0.018944000204404194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,8191,0.04057066639264425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,8191,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,16383,0.06159999966621399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,16383,0.0397119993964831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,32767,0.10511466860771179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,32767,0.05910933514436086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,65535,0.189903994401296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,1,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,1,0.011424000064531961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,65535,0.10089600086212158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,3,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,3,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,7,0.009354666496316591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,7,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,15,0.010319999729593595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,15,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,31,0.01020800011853377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,31,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,63,0.009349333122372627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,63,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,127,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,127,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,255,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,255,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,511,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,511,0.011472000430027643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,1023,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,1023,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,2047,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,2047,0.01328533391157786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,4095,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,4095,0.013354666531085968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,8191,0.018863999595244724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,8191,0.01777600000301997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,16383,0.021344001094500225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,16383,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,32767,0.023210667073726654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,32767,0.023381332556406658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,131071,0.36185065905253094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,131071,0.1830400029818217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,65535,0.025386666258176167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,65535,0.02348800003528595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,1,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,1,0.011333333949247995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,3,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,3,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,7,0.01138666644692421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,7,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,15,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,15,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,31,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,31,0.011413333316644033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,131071,0.04849599798520406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,63,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,63,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,131071,0.02959999938805898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,127,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,127,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,255,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,255,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,511,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,511,0.012181332955757776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,1023,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,1023,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,2047,0.02107200026512146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,2047,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,4095,0.03984000037113825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,4095,0.023418667415777843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,8191,0.03965333352486292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,8191,0.06136000156402588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,16383,0.10322133700052898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,16383,0.05964266757170359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,32767,0.09945600231488545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,32767,0.18779200315475464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,1,0.013082666943470636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,1,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,3,0.013082666943470636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,3,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,7,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,7,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,15,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,15,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,65535,0.3577440182367961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,31,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,31,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,63,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,63,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,127,0.012170666207869848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,127,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,65535,0.1787253419558207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,255,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,255,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,511,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,511,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,1023,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,1023,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,2047,0.03995733211437861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,2047,0.024341332415739696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,4095,0.039664000272750854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,4095,0.06172266602516174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,8191,0.10478933652242024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,8191,0.06192533175150553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,1,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,1,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,3,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,3,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,16383,0.10283199946085612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,16383,0.1890559991200765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,7,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,7,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,15,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,15,0.014688000082969666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,31,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,31,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,63,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,63,0.01471466695268949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,127,0.014837333311637243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,127,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,255,0.014831999937693277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,255,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,511,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,511,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,1023,0.035589332381884255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,1023,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,2047,0.03990933299064636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,2047,0.06238933404286703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,4095,0.10562666257222493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,4095,0.06233599781990051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,1,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,1,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,3,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,3,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,7,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,7,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,15,0.010154666379094124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,15,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,31,0.009861333295702934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,31,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,63,0.010298666854699453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,63,0.010405333091815313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,127,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,127,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,255,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,255,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,8191,0.105295995871226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,8191,0.1914400060971578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,511,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,511,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,1023,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,1023,0.011839999506870905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,2047,0.01431999976436297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,2047,0.013424000392357508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,4095,0.016869333883126576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,4095,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,8191,0.019440000255902607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,8191,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,16383,0.0210506667693456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,16383,0.021141332884629566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,32767,0.023567999402681988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,32767,0.02178666740655899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,65535,0.043162668744723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,65535,0.02553066611289978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,1,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,1,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,131071,0.06509333352247874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,131071,0.04366933306058248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,3,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,3,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,7,0.018885333091020584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,7,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,15,0.018853332847356796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,15,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,31,0.018911999960740406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,31,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,63,0.018778666853904724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,63,0.01754133279124896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,127,0.01942933350801468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,127,0.01746133342385292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,255,0.0189280000825723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,255,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,511,0.03442666679620743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,511,0.0216799999276797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,1023,0.05374933282534281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,1023,0.03506666670242945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,2047,0.0981226662794749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,2047,0.058143998185793556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,4095,0.17914666732152304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,1,0.0272533322374026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,4095,0.0960640013217926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,1,0.025493333737055462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,3,0.027808000644048054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,3,0.025381334125995636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,7,0.02779199928045273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,7,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,15,0.02802666773398717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,15,0.025301332275072735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,31,0.027552001178264618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,63,0.02759466568628947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,31,0.023717333873112995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,63,0.025279998779296875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,127,0.02736533433198929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,127,0.025114665428797405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,255,0.033376000821590424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,255,0.02535466601451238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,511,0.05417599777380625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,511,0.03760000069936117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,1023,0.09289600451787312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,1,0.009759999811649323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,1023,0.05426133175690969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,1,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,3,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,3,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,7,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,7,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,15,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,15,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,31,0.009056000038981438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,31,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,63,0.010224000240365664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,63,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,127,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,127,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,255,0.009285333255926767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,255,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,511,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,511,0.011333333949247995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,1023,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,1023,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,2047,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,2047,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,2047,0.1774453322092692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,4095,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,2047,0.09788266817728679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,4095,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,8191,0.019343999524911244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,8191,0.018186666071414948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,16383,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,16383,0.021562665700912476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,32767,0.04283200204372406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,32767,0.02514133354028066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,65535,0.04214933514595032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,65535,0.062463998794555664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,131071,0.06274666885534923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,131071,0.10385599732398987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,1,0.046165332198143005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,3,0.045968001087506614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,3,0.04582933088143667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,3,0.03769599894682566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,3,0.03789333254098892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,7,0.04602666695912679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,7,0.04623466730117798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,7,0.03984533250331879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,7,0.03999999910593033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,15,0.047770669062932335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,15,0.04799466828505198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,15,0.041573333243529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,1,0.03810133288304011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,15,0.040933333337306976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,31,0.05589866638183594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,31,0.055914665261904396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,31,0.0497920016447703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,31,0.05002133548259735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,63,0.056362668673197426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,63,0.05641599992911021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,63,0.0499893327554067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,63,0.05013866722583771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,127,0.05632533133029938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,127,0.0580213318268458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,127,0.050160000721613564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,127,0.05017066498597463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,255,0.05635733405749003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,1,0.03793066740036011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,255,0.06670400003592174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,255,0.0497920016447703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,255,0.06035733222961426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,511,0.05665599803129832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,511,0.09716799855232239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,511,0.04987733562787374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,511,0.09117866555849712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,1023,0.05816000203291575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,1023,0.1595093309879303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,1023,0.050197333097457886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,1023,0.1543359955151876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,2047,0.05817066629727682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,2047,0.2858399947484334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,2047,0.05023466547330221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,4095,0.05787200232346853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,2047,0.27938665946324664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,4095,0.05008533100287119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,4095,0.5379360119501749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,1,0.0459199994802475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,4095,0.5293386777242025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,1,0.04562666515509287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,1,0.03766933331886927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,1,0.037621334195137024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,3,0.04580266773700714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,3,0.04610666632652283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,3,0.037589333951473236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,3,0.03797333439191183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,7,0.04614399870236715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,7,0.04597333570321401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,7,0.03979199876387914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,7,0.03953066716591517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,15,0.04794666667779287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,15,0.04797333478927612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,15,0.0413973331451416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,15,0.039594667653242745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,31,0.056202664971351624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,31,0.05622399846712748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,31,0.04977599779764811
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,31,0.050293331344922386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,63,0.05596266686916351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,63,0.05648533503214518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,63,0.050026665131251015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,63,0.05004266897837321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,127,0.05622933308283488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,127,0.058101331194241844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,127,0.04983466863632202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,127,0.05006400247414907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,255,0.05628266433874766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,255,0.06651199857393901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,255,0.049866666396458946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,255,0.06027733286221822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,511,0.05793066819508871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,511,0.09734400113423665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,511,0.051258668303489685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,511,0.090938667456309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,1023,0.05807999769846598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,1023,0.04975466430187225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,1,0.04585599899291992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,1023,0.16040533781051636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,1023,0.15451733271280924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,2047,0.05804799993832906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,2047,0.05045866469542185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,2047,0.2797120014826457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,2047,0.2857973376909892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,4095,0.05809600154558817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,1,0.04595733185609182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,4095,0.05013333261013031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,1,0.045696000258127846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,1,0.03796799977620443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,4095,0.538319985071818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,1,0.037685332198937736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,3,0.045834665497144066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,4095,0.5293706655502319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,3,0.04630400240421295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,3,0.03779733429352442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,3,0.03740799923737844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,7,0.04604800045490265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,7,0.039749334255854286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,7,0.04614933331807455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,7,0.03959999978542328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,15,0.04810666541258494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,15,0.047839999198913574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,15,0.03985599925120672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,15,0.04139200101296107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,31,0.05622399846712748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,31,0.05628266433874766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,31,0.04991999765237173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,31,0.050293331344922386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,63,0.05595199763774872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,63,0.050144001841545105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,63,0.05625066657861074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,63,0.05036266644795736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,127,0.05620799958705902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,127,0.05622399846712748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,127,0.05012799799442291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,127,0.05030933519204458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,255,0.05705066521962484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,255,0.06656000018119812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,255,0.05014933149019877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,255,0.06044800082842509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,511,0.05782933533191681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,511,0.09718933701515198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,511,0.050293331344922386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,511,0.09170132875442505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,1023,0.058287998040517174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,1023,0.15889066457748413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,1023,0.05013333261013031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,1023,0.1525813341140747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,2047,0.05644799768924713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,2047,0.2868906656901042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,2047,0.05017599960168203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,2047,0.279039998849233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,4095,0.058143998185793556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,4095,0.5438079833984375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,4095,0.05002133548259735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,1,0.011733333269755045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,1,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,4095,0.5306666692097982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,1,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,1,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,3,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,3,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,3,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,3,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,7,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,7,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,7,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,7,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,15,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,15,0.011621333658695221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,15,0.013306666165590286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,15,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,31,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,31,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,31,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,31,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,63,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,63,0.012800000607967377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,63,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,63,0.012805332740147909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,127,0.012805332740147909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,127,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,127,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,127,0.012784000486135483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,255,0.011834666132926941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,255,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,255,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,255,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,511,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,511,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,511,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,511,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,1023,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,1023,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,1023,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,1023,0.01333333303531011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,2047,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,2047,0.025472000241279602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,2047,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,2047,0.023423999547958374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,4095,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,4095,0.04176533222198486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,4095,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,1,0.009103999783595404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,4095,0.03535466641187668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,1,0.00895999992887179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,1,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,1,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,3,0.009029333169261614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,3,0.008949333180983862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,3,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,3,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,7,0.009349333122372627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,7,0.009189333145817121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,7,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,7,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,15,0.008778666456540426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,15,0.009072000160813332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,15,0.010565333068370819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,15,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,31,0.009066666786869368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,31,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,31,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,31,0.01009599988659223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,63,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,63,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,63,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,127,0.010538666198650995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,63,0.012149333953857422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,127,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,127,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,127,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,255,0.010575999816258749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,255,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,255,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,511,0.019727999965349834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,255,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,511,0.021151999632517498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,511,0.013381333400805792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,511,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,1023,0.023210667073726654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,1023,0.019424000134070713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,1023,0.023530667026837666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,1023,0.027189334233601887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,2047,0.019482667247454327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,2047,0.029818666477998097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,2047,0.02312533309062322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,2047,0.03127466638882955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,4095,0.03958933303753535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,4095,0.019578666736682255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,4095,0.023168000082174938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,4095,0.03977599988381068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,8191,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,8191,0.06214933097362518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,8191,0.023152001202106476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,8191,0.05861333509286245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,16383,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,16383,0.10531733433405559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,16383,0.023621333142121632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,1,0.009301333377758661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,1,0.009152000149091085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,1,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,16383,0.0953546663125356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,3,0.008853333070874214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,1,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,3,0.008709333216150602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,3,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,3,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,7,0.008879999940594038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,7,0.009029333169261614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,7,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,7,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,15,0.008885333314538002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,15,0.00916800027092298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,15,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,15,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,31,0.009418666362762451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,31,0.008896000062425932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,31,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,31,0.01051733394463857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,63,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,63,0.008943999807039896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,63,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,63,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,127,0.00903466654320558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,127,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,127,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,127,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,255,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,255,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,255,0.013248000293970108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,255,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,511,0.01930133377512296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,511,0.021183999876181286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,511,0.012757333616415659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,511,0.012810666114091873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,1023,0.019551999866962433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,1023,0.023311999936898548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,1023,0.023397333920001984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,1023,0.027322667340437572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,2047,0.0295413335164388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,2047,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,2047,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,2047,0.03001066545645396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,4095,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,4095,0.03997866561015447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,4095,0.023541333774725597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,4095,0.03984000037113825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,8191,0.019359999646743137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,8191,0.062122667829195656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,8191,0.024634666740894318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,8191,0.058693334460258484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,16383,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,16383,0.10583999752998352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,16383,0.023621333142121632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,1,0.008832000195980072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,16383,0.09500799576441447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,1,0.008901333436369896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,1,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,1,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,3,0.009216000015536943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,3,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,3,0.008922666932145754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,3,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,7,0.008687999720374743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,7,0.010490667074918747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,7,0.008778666456540426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,7,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,15,0.009226666763424873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,15,0.008912000184257826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,15,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,15,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,31,0.009194666519761086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,31,0.008912000184257826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,31,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,31,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,63,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,63,0.010618666807810465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,63,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,63,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,127,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,127,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,127,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,127,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,255,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,255,0.010496000448862711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,255,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,511,0.01937066639463107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,255,0.013338666409254074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,511,0.0215786670645078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,511,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,511,0.013290667285521826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,1023,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,1023,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,1023,0.027461332579453785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,1023,0.023205332458019257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,2047,0.019317333896954853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,2047,0.029637334247430164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,2047,0.02310933421055476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,2047,0.03133866687615713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,4095,0.01959466685851415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,4095,0.03982933362325033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,4095,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,4095,0.04028266668319702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,8191,0.019413333386182785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,8191,0.062319998939832054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,8191,0.023141334454218548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,8191,0.05842133363087972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,16383,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,16383,0.10522666573524475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,16383,0.02348266790310542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,1,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,16383,0.09585066636403401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,1,0.009930666536092758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,1,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,1,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,3,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,3,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,3,0.011781333635250727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,7,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,3,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,7,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,7,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,7,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,15,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,15,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,15,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,15,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,31,0.011616000284751257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,31,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,31,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,63,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,31,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,63,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,63,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,63,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,127,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,127,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,127,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,127,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,255,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,255,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,255,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,255,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,511,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,511,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,511,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,511,0.012661332885424295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,1023,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,1023,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,1023,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,1023,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,2047,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,2047,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,2047,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,2047,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,4095,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,4095,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,4095,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,4095,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,8191,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,8191,0.016255999604860943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,8191,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,8191,0.015647999942302704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,16383,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,16383,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,16383,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,1,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,1,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,1,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,16383,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,1,0.00943999985853831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,3,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,3,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,3,0.009152000149091085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,3,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,7,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,7,0.013082666943470636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,7,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,15,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,7,0.009455999980370203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,15,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,15,0.013877333452304205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,15,0.00926399976015091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,31,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,31,0.013845333208640417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,31,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,31,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,63,0.015781333049138386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,63,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,63,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,127,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,63,0.012213333199421564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,127,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,127,0.012752000242471695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,255,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,127,0.011776000261306763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,255,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,255,0.012416000167528788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,255,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,511,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,511,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,511,0.012597333639860153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,511,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,1023,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,1023,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,1023,0.021557333568731945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,1023,0.019002666076024372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,2047,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,2047,0.032074667513370514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,2047,0.012666666259368261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,2047,0.029311999678611755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,4095,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,4095,0.01333333303531011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,4095,0.052069331208864846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,4095,0.04814933240413666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,8191,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,8191,0.09110400080680847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,8191,0.012800000607967377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,8191,0.08643200000127156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,16383,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,16383,0.16906134287516275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,16383,0.012800000607967377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,1,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,1,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,16383,0.16217066844304404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,1,0.009173333023985228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,1,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,3,0.013386666774749756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,3,0.013082666943470636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,3,0.010496000448862711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,7,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,7,0.013455999394257864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,3,0.009077333534757296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,7,0.009045333291093508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,7,0.01055466632048289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,15,0.013536000003417334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,15,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,15,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,15,0.009338666374484697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,31,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,31,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,31,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,31,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,63,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,63,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,63,0.012714666624863943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,63,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,127,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,127,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,127,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,127,0.013557333499193192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,255,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,255,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,255,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,255,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,511,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,511,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,511,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,511,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,1023,0.023082666099071503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,1023,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,1023,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,1023,0.019343999524911244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,2047,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,2047,0.031397332747777305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,2047,0.029658667743206024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,2047,0.013194666554530462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,4095,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,4095,0.052229334910710655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,4095,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,4095,0.048112000028292336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,8191,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,8191,0.012063999970753988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,8191,0.09135466814041138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,16383,0.015562667200962702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,8191,0.08738133311271667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,16383,0.01209066684047381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,16383,0.16897066434224448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,1,0.013455999394257864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,1,0.00895999992887179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,1,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,1,0.009152000149091085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,3,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,16383,0.16259732842445374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,3,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,3,0.009178666397929192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,3,0.008912000184257826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,7,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,7,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,7,0.009178666397929192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,7,0.010330666477481524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,15,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,15,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,15,0.010250666489203772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,15,0.009136000027259191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,31,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,31,0.013349333157142004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,31,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,31,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,63,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,63,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,63,0.012752000242471695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,63,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,127,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,127,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,127,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,127,0.012671999633312225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,255,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,255,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,255,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,255,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,511,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,511,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,511,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,511,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,1023,0.015429332852363586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,1023,0.023034666975339253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,1023,0.012826666235923767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,1023,0.01897066707412402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,2047,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,2047,0.031680000325044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,2047,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,2047,0.029253333806991577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,4095,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,4095,0.051962668697039284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,4095,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,4095,0.04821866750717163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,8191,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,8191,0.09104532996813457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,8191,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,8191,0.08684266606966655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,16383,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,16383,0.16911466916402182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,1,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,16383,0.012810666114091873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,1,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,16383,0.16284799575805664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,1,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,1,0.010538666198650995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,3,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,3,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,3,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,3,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,7,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,7,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,7,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,7,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,15,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,15,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,15,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,15,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,31,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,31,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,31,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,31,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,63,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,63,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,63,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,63,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,127,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,127,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,127,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,127,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,255,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,255,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,255,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,255,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,511,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,511,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,511,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,511,0.012576000144084295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,1023,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,1023,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,1023,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,1023,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,2047,0.013359999905029932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,2047,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,2047,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,4095,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,2047,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,4095,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,4095,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,4095,0.01595199977358182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,8191,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,8191,0.01756799966096878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,8191,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,8191,0.017781333376963932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,16383,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,16383,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,16383,0.011866666376590729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,16383,0.01974933346112569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,1,0.08281599978605907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,1,0.08513599634170532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,1,0.06877333422501881
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,1,0.06865066786607106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,3,0.08501332998275757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,3,0.06851733227570851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,3,0.08360000451405843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,3,0.06888000170389812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,7,0.0869813362757365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,7,0.0867680013179779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,7,0.07242666681607564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,7,0.0718399981657664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,15,0.08701866865158081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,15,0.0888213316599528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,15,0.07469333211580913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,15,0.07435733576615651
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,31,0.10558399558067322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,31,0.10521066188812256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,31,0.09316800038019817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,31,0.09309867024421692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,63,0.10545066992441814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,63,0.10564266641934712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,63,0.09455999732017517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,63,0.09303466478983562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,127,0.10727999607721965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,127,0.1076853374640147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,127,0.09497599800427754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,127,0.09309867024421692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,255,0.10780266920725505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,255,0.12588799993197122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,255,0.0941493312517802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,255,0.1136853297551473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,511,0.10744532942771912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,511,0.09493333101272583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,511,0.1889866590499878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,511,0.17509333292643228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,1023,0.10777067144711812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,1023,0.09483200311660767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,1023,0.3102666735649109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,1023,0.2987893422444661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,2047,0.10750400026639302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,2047,0.09494933485984802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,2047,0.5621866782506307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,1,0.08482133348782857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,2047,0.5459146499633789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,1,0.08449600140253703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,1,0.06869866450627644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,1,0.06824000179767609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,3,0.08470400174458821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,3,0.08470933636029561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,3,0.06825600067774455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,3,0.06830400228500366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,7,0.08664000034332275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,7,0.08701333403587341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,7,0.07182399928569794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,7,0.07306666672229767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,15,0.08888000249862671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,15,0.08820266524950664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,15,0.07458133498827617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,31,0.1056160032749176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,15,0.07504533231258392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,31,0.10541866223017375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,31,0.09337600072224934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,31,0.09307199716567993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,63,0.10739733775456746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,63,0.1058186690012614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,63,0.09408533573150635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,63,0.09303999940554301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,127,0.107424000898997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,127,0.1076586643854777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,127,0.09496000409126282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,127,0.09442133704821269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,255,0.10745599865913391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,255,0.12621333201726279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,255,0.09529067079226176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,255,0.11356799801190694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,511,0.1076639990011851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,511,0.09512000282605489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,511,0.18853867053985596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,511,0.17515732844670615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,1023,0.10748266180356343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,1023,0.09527466694513957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,1023,0.3102399905522664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,2047,0.10760000348091125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,1023,0.2982400059700012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,2047,0.09513599673906963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,1,0.08391466736793518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,1,0.0846506655216217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,2047,0.5620853503545126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,1,0.06869333485762279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,2047,0.5479733149210612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,1,0.06861866513888042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,3,0.08493866523106892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,3,0.08487466971079509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,3,0.06961599985758464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,3,0.06858666737874348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,7,0.08705066641171773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,7,0.08699733018875122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,7,0.07081600030263265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,7,0.07242133220036824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,15,0.08910399675369263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,15,0.08767466743787129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,15,0.07472000022729237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,15,0.075013334552447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,31,0.10547199845314026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,31,0.10539199908574422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,31,0.09306666254997253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,31,0.09310932954152425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,63,0.10669333736101787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,63,0.10744000474611919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,63,0.09306666254997253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,63,0.09342933694521587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,127,0.10781332850456238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,127,0.10757866501808167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,127,0.09431466460227966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,127,0.09496532877286275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,255,0.10749333103497823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,255,0.1257973313331604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,255,0.09496532877286275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,255,0.1136799951394399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,511,0.10803199807802837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,511,0.09539199868837993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,511,0.1893493334452311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,511,0.17522666851679483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,1023,0.1076853374640147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,1023,0.31035200754801434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,1023,0.09499200185139973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,1023,0.2980533242225647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,2047,0.10760533809661865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,2047,0.09513066212336223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,2047,0.5707093477249146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,1,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,2047,0.547541340192159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,1,0.015557333827018738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,1,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,1,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,3,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,3,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,3,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,3,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,7,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,7,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,7,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,7,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,15,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,15,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,15,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,15,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,31,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,31,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,31,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,31,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,63,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,63,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,63,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,63,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,127,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,127,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,127,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,127,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,255,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,255,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,255,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,255,0.015717333803574245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,511,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,511,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,511,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,511,0.017909333109855652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,1023,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,1023,0.023402666052182514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,1023,0.014688000082969666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,2047,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,1023,0.023578666150569916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,2047,0.04264533519744873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,2047,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,1,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,1,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,1,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,1,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,2047,0.03596800069014231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,3,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,3,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,3,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,3,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,7,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,7,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,7,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,7,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,15,0.013354666531085968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,15,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,15,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,15,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,31,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,31,0.013295999417702356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,31,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,31,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,63,0.013317332913478216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,63,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,63,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,63,0.01953599974513054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,127,0.013493333011865616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,127,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,127,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,127,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,255,0.01341333364446958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,255,0.01543466622630755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,255,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,255,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,511,0.014762666076421738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,511,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,511,0.021274665991465252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,511,0.023381332556406658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,1023,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,1023,0.031040000418821972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,1023,0.019461333751678467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,1023,0.03162133445342382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,2047,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,2047,0.05110933383305868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,2047,0.019066666563351948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,2047,0.05031466484069824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,4095,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,4095,0.09113599856694539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,4095,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,4095,0.0886346697807312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,8191,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,8191,0.019317333896954853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,8191,0.17041067282358804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,8191,0.16236799955368042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,16383,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,16383,0.019194666296243668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,16383,0.33533867200215656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,1,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,1,0.013082666943470636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,1,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,16383,0.3102560043334961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,1,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,3,0.012794667234023413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,3,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,3,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,3,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,7,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,7,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,7,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,7,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,15,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,15,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,15,0.014896000425020853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,15,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,31,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,31,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,31,0.015477333217859268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,31,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,63,0.013386666774749756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,63,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,63,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,63,0.019007999449968338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,127,0.01333333303531011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,127,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,127,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,127,0.019194666296243668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,255,0.014208000153303146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,255,0.01934933289885521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,255,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,255,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,511,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,511,0.020992000897725422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,511,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,511,0.02316266546646754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,1023,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,1023,0.02997333308060964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,1023,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,1023,0.031856000423431396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,2047,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,2047,0.050106664498647056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,2047,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,2047,0.05004799862702688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,4095,0.01331199953953425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,4095,0.09101866682370503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,4095,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,4095,0.08700799942016602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,8191,0.01482133318980535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,8191,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,8191,0.17095466454823813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,8191,0.16246933738390604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,16383,0.014815999815861383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,1,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,16383,0.019567999988794327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,1,0.014650666465361914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,1,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,16383,0.33581864833831787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,16383,0.31040000915527344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,1,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,3,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,3,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,3,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,3,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,7,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,7,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,7,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,7,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,15,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,15,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,15,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,15,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,31,0.013194666554530462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,31,0.0144213338692983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,31,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,31,0.015909332782030106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,63,0.014831999937693277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,63,0.013466666142145792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,63,0.019354666272799175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,63,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,127,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,127,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,127,0.019637333850065868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,127,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,255,0.01403733342885971
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,255,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,255,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,255,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,511,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,511,0.02062400057911873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,511,0.01926933353145917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,511,0.023082666099071503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,1023,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,1023,0.019434666881958645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,1023,0.029717333614826202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,1023,0.03187733391920725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,2047,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,2047,0.050111999114354454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,2047,0.019050666441520054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,2047,0.050437331199645996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,4095,0.01331199953953425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,4095,0.0195573332409064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,4095,0.0912000040213267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,4095,0.08709866801897685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,8191,0.013253333667914072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,8191,0.17290133237838745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,8191,0.019258666783571243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,8191,0.16108799974123636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,16383,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,16383,0.01956266661485036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,16383,0.3391146659851074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,1,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,1,0.010437333335479101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,16383,0.31031467517217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,1,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,1,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,3,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,3,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,3,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,3,0.010527999450763067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,7,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,7,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,7,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,7,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,15,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,15,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,15,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,15,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,31,0.010618666807810465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,31,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,31,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,31,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,63,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,63,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,63,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,63,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,127,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,127,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,127,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,127,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,255,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,255,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,255,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,255,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,511,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,511,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,511,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,511,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,1023,0.011605333536863327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,1023,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,1023,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,1023,0.014511999984582266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,2047,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,2047,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,2047,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,2047,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,4095,0.013605333864688873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,4095,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,4095,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,4095,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,8191,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,8191,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,8191,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,8191,0.018992000569899876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,16383,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,16383,0.025578667720158894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,16383,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,16383,0.02362666775782903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,1,0.16054933269818625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,1,0.1604106624921163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,1,0.1302293340365092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,1,0.12991467118263245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,3,0.16060800353686014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,3,0.16107733050982156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,3,0.1299626628557841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,3,0.13038933277130127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,7,0.16660799582799277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,7,0.1646986703077952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,7,0.1360586682955424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,7,0.13612799843152365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,15,0.16876266400019327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,15,0.16873067617416382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,15,0.14218133687973022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,15,0.1420906682809194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,31,0.20384534200032553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,31,0.20389332373936972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,31,0.17923200130462646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,31,0.1795413295427958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,63,0.20582934220631918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,63,0.20572266976038614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,63,0.18107734123865762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,63,0.18129066626230875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,127,0.2079040010770162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,127,0.20779200394948324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,127,0.18157333135604858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,127,0.18126400311787924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,255,0.2080693244934082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,255,0.24518932898839316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,255,0.1812266707420349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,255,0.22002132733662924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,511,0.20999999841054282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,511,0.36766934394836426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,511,0.18125865856806436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,511,0.3439093430836995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,1023,0.20787733793258667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,1023,0.18279467026392618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,1,0.16053332885106406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,1023,0.6114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,1023,0.5894720156987509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,1,0.1300320029258728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,1,0.16085867087046304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,1,0.13020267089207968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,3,0.1606559952100118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,3,0.1606613298257192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,3,0.13198933005332947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,3,0.13013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,7,0.16505066553751627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,7,0.16479999820391336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,7,0.13648000359535217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,7,0.13614400227864584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,15,0.1688800056775411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,15,0.16865599155426025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,15,0.14257066448529562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,15,0.1421333352724711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,31,0.20390399297078451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,31,0.20374399423599243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,31,0.1795360048611959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,31,0.17939200003941855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,63,0.2057759960492452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,63,0.2059146761894226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,63,0.18134933710098267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,63,0.18116267522176108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,127,0.20812267065048218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,127,0.20785599946975708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,127,0.18343466520309448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,127,0.1812959909439087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,255,0.2087413271268209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,255,0.2449280023574829
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,255,0.1832746664683024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,255,0.22046399116516113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,511,0.20964266856511435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,511,0.3676106532414754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,511,0.1832053263982137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,511,0.34329064687093097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,1023,0.20983999967575073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,1023,0.18315200010935465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,1,0.16103999813397726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,1023,0.611413319905599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,1,0.16075733304023743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,1,0.13185600439707437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,1023,0.5903893311818441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,1,0.1318880021572113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,3,0.160970667997996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,3,0.16090666254361471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,3,0.13196266690889993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,3,0.13199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,7,0.16495999693870544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,7,0.16666133205095926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,7,0.13619200388590494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,7,0.13619200388590494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,15,0.16910932461420694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,15,0.1688800056775411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,15,0.1422719955444336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,15,0.14221333463986716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,31,0.2037173310915629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,31,0.20399999618530273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,31,0.18047465880711874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,31,0.17941333850224814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,63,0.2059733271598816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,63,0.2061013380686442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,63,0.18127467234929404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,63,0.18098666270573935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,127,0.2079253395398458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,127,0.20786132415135702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,127,0.1832480033238729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,127,0.1832266648610433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,255,0.2078933318456014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,255,0.24503467480341592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,255,0.1831093430519104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,255,0.22004266579945883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,511,0.2095253268877665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,511,0.3681013186772664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,511,0.18121065696080527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,511,0.3433813254038493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,1023,0.20992000897725424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,1023,0.1829866568247477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,1023,0.6256213188171387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,1,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,1,0.0194560003777345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,1023,0.5890186627705892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,1,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,1,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,3,0.01998399943113327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,3,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,3,0.01952533299724261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,3,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,7,0.02004266654451688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,7,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,7,0.019317333896954853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,7,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,15,0.019445333629846573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,15,0.019509332875410717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,15,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,15,0.01933866615096728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,31,0.019717333217461903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,31,0.019258666783571243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,31,0.019530666371186573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,31,0.01930133377512296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,63,0.019573333362738293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,63,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,63,0.01958400011062622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,63,0.019317333896954853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,127,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,127,0.020319999506076176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,127,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,255,0.019424000134070713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,127,0.019920000185569126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,255,0.019600000232458115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,255,0.019386666516462963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,255,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,511,0.025589334468046825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,511,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,511,0.02035733312368393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,511,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,1023,0.022554665803909302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,1023,0.03862400104602178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,1023,0.03352533280849457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,1023,0.019573333362738293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,1,0.3122719923655192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,1,0.3125600020090739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,1,0.25280533234278363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,1,0.2532266577084859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,3,0.3126346667607625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,3,0.31244800488154095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,3,0.25304534037907916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,3,0.2531733314196269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,7,0.32411734263102215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,7,0.2632906635602315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,7,0.32436267534891766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,7,0.2637493411699931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,15,0.2767893274625142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,15,0.3307200074195862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,15,0.32868266105651855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,15,0.2758400042851766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,31,0.40035200119018555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,31,0.3511253197987874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,31,0.4001493453979492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,31,0.35157867272694904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,63,0.35501333077748615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,63,0.40510932604471844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,63,0.4045120080312093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,63,0.3553333282470703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,127,0.3574826717376709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,127,0.40861332416534424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,127,0.40883731842041016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,127,0.3575146595637004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,255,0.3575040102005005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,255,0.4103039900461833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,255,0.48286934693654376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,1,0.3123199939727783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,1,0.3142613371213277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,255,0.43355735143025714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,1,0.2542453408241272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,1,0.25491732358932495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,3,0.3144746621449788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,3,0.2550826668739319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,3,0.31251732508341473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,3,0.2547093431154887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,7,0.32289600372314453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,7,0.3243199984232585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,7,0.2650773326555888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,7,0.26493332783381146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,15,0.33085334300994873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,15,0.33104532957077026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,15,0.27562133471171063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,15,0.2770613431930542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,31,0.40066667397816974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,31,0.35155200958251953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,31,0.40037866433461505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,31,0.35202133655548096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,63,0.40516265233357746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,63,0.4046613375345866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,63,0.3553333282470703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,63,0.3550293445587158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,127,0.4090506633122762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,127,0.40993599096934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,127,0.35740800698598224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,127,0.3578186829884847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,255,0.4105759859085083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,255,0.3577066659927368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,255,0.43274132410685223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,1,0.31487999359766644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,255,0.4837813377380371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,1,0.31432533264160156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,1,0.25563732782999676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,1,0.25683732827504474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,3,0.31498666604359943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,3,0.3145013252894084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,3,0.2553973396619161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,3,0.25703465938568115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,7,0.3234879970550537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,7,0.2651626666386922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,7,0.32440000772476196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,7,0.26551467180252075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,15,0.27688533067703247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,15,0.2773759961128235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,15,0.33059199651082355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,15,0.33133333921432495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,31,0.4002666473388672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,31,0.35124798615773517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,31,0.40065598487854004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,31,0.35172800223032635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,63,0.3553813298543294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,63,0.40511465072631836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,63,0.4043946663538615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,63,0.3553333282470703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,127,0.40993066628774005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,127,0.35736000537872314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,127,0.4089920123418172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,127,0.3575040102005005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,255,0.35787200927734375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,255,0.4110720157623291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,255,0.4824479818344116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,255,0.43351467450459796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,1,0.029802667597929638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,1,0.02958933264017105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,1,0.02938133229811986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,1,0.029637334247430164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,3,0.029477333029111225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,3,0.029696000119050343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,3,0.029296000798543293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,3,0.029333333174387615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,7,0.029631999631722767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,7,0.02956799914439519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,7,0.02924266705910365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,15,0.029301332930723827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,7,0.029215998947620392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,15,0.029669334491093952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,15,0.029338667790095013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,15,0.02924266705910365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,31,0.02976000060637792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,31,0.029552000264326733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,31,0.029690665503342945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,31,0.029498666524887085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,63,0.029557332396507263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,63,0.029557332396507263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,63,0.029322666426499683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,63,0.02945599953333537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,127,0.0296426663796107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,127,0.02938133229811986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,127,0.029738667110602062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,127,0.02951466788848241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,255,0.031658666829268135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,255,0.02941333254178365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,255,0.029253333806991577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,255,0.02923733244339625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,float16,1,0.6180266539255778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,fp8,1,0.4970986843109131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,float16,1,0.6164373159408569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,fp8,1,0.4983359972635905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,float16,3,0.6177226702372233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,float16,3,0.6168906688690186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,fp8,3,0.4986879825592041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,fp8,3,0.4989653428395589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,float16,7,0.6383466720581055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,float16,7,0.6405066649119059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,fp8,7,0.5210666656494141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,fp8,7,0.521237333615621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,float16,15,0.653221329053243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,fp8,15,0.5457280079523722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,float16,15,0.6503893136978149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,fp8,15,0.5459253390630087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,float16,31,0.7925386428833008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,float16,31,0.7932159900665283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,fp8,31,0.6972106297810873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,fp8,31,0.6954027016957601
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,float16,63,0.8022133509318033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,fp8,63,0.7033546765645345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,float16,63,0.8029546737670898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,fp8,63,0.7018293539683024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,float16,127,0.8097173372904459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,float16,127,0.8114666938781738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,fp8,127,0.707530657450358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,fp8,127,0.7072532971700033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,float16,1,0.6185119946797689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,float16,1,0.6194666624069214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,fp8,1,0.5011733373006185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,fp8,1,0.5008693138758341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,float16,3,0.6185119946797689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,float16,3,0.6195253531138102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,fp8,3,0.5008426507314047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,fp8,3,0.5009333292643229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,float16,7,0.6385333140691122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,float16,7,0.6397653420766195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,fp8,7,0.5217706759770712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,fp8,7,0.5210346778233846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,float16,15,0.6528053283691406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,float16,15,0.6537866592407227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,fp8,15,0.54585067431132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,fp8,15,0.544922669728597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,float16,31,0.7936853567759196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,float16,31,0.7935307025909424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,fp8,31,0.6957440376281738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,fp8,31,0.6952853202819824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,float16,63,0.8034826914469401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,fp8,63,0.7031306425730387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,float16,63,0.803978681564331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,fp8,63,0.7026293277740479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,float16,127,0.8111680348714193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,fp8,127,0.7077226638793945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,float16,127,0.8115466435750326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,fp8,127,0.7078773180643717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,float16,1,0.6208586692810059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,fp8,1,0.5045066674550375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,float16,1,0.6213226715723673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,fp8,1,0.5046453475952148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,float16,3,0.6213493347167969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,fp8,3,0.5049866835276285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,float16,3,0.6219360033671061
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,fp8,3,0.5048373142878214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,float16,7,0.6396693388621012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,fp8,7,0.5229066610336304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,float16,7,0.6403626600901285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,fp8,7,0.5216159820556641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,float16,15,0.652239998181661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,fp8,15,0.5459146499633789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,float16,15,0.652453343073527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,fp8,15,0.5458879868189493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,float16,31,0.793450673421224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,fp8,31,0.6970293521881104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,float16,31,0.7944586277008057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,fp8,31,0.6955946286519369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,float16,63,0.8034613132476807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,fp8,63,0.7039039929707845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,float16,63,0.8033599853515625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,fp8,63,0.7031306425730387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,float16,127,0.8115573724110922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,fp8,127,0.7080480257670084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,float16,127,0.8120480378468832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,fp8,127,0.7083786328633627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,float16,1,0.0497920016447703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,float16,1,0.050255998969078064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,fp8,1,0.048351998130480446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,fp8,1,0.04771199822425842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,float16,3,0.04997866849104563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,fp8,3,0.04829333225886027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,float16,3,0.05008000135421753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,fp8,3,0.04829333225886027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,float16,7,0.05036266644795736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,float16,7,0.05016533533732096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,fp8,7,0.04770133395989736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,fp8,7,0.04822400212287903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,float16,15,0.05031999945640564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,float16,15,0.049882665276527405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,fp8,15,0.04790399968624115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,fp8,15,0.04780266682306925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,float16,31,0.05004799862702688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,float16,31,0.05000533163547516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,fp8,31,0.04861866434415182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,float16,63,0.050016000866889954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,fp8,31,0.04773866633574168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,float16,63,0.05012266834576925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,fp8,63,0.0481279989083608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,fp8,63,0.047797332207361855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,float16,127,0.05068266888459524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,float16,127,0.05009066561857859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,fp8,127,0.0481279989083608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,fp8,127,0.04788800080617269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,1,0.017498667041460674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,1,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,1,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,1,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,3,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,3,0.01748266691962878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,3,0.014725333700577417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,3,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,7,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,7,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,7,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,7,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,15,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,15,0.01775466650724411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,15,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,15,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,31,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,31,0.019354666272799175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,31,0.017610666652520496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,31,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,63,0.0195573332409064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,63,0.01953599974513054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,63,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,63,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,127,0.01940800001223882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,127,0.019002666076024372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,127,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,127,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,255,0.0195573332409064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,255,0.02161066730817159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,255,0.017498667041460674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,255,0.020666666328907013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,511,0.019727999965349834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,511,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,511,0.03057066599527995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,511,0.029658667743206024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,1023,0.01940800001223882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,1023,0.01754666616519292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,1023,0.04772266745567322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,1023,0.04554666578769684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,2047,0.019424000134070713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,2047,0.08076799909273784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,2047,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,2047,0.08046400050322215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,4095,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,4095,0.017583999782800674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,4095,0.1485973298549652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,4095,0.14629866679509482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,8191,0.019632000476121902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,8191,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,8191,0.2836266756057739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,8191,0.28381866216659546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,16383,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,16383,0.017594666530688603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,16383,0.5849653482437134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,1,0.017637333522240322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,1,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,1,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,16383,0.5929173231124878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,1,0.014032000054915747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,3,0.017583999782800674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,3,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,3,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,3,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,7,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,7,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,7,0.015504000087579092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,15,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,15,0.018485333770513535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,7,0.015728000551462173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,15,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,15,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,31,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,31,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,31,0.01964266722400983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,31,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,63,0.019359999646743137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,63,0.01939733326435089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,63,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,63,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,127,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,127,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,127,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,127,0.017407999684413273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,255,0.019194666296243668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,255,0.021216000119845074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,255,0.017535999417304993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,511,0.019754666835069656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,255,0.02109866589307785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,511,0.03054400036732356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,511,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,511,0.02921066681543986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,1023,0.019754666835069656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,1023,0.04799466828505198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,1023,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,1023,0.04593066871166229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,2047,0.019413333386182785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,2047,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,2047,0.08090133468310039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,2047,0.07888533174991608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,4095,0.019546666493018467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,4095,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,4095,0.14860799908638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,4095,0.1471680005391439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,8191,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,8191,0.017551999539136887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,8191,0.2836959958076477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,8191,0.28246400753657025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,16383,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,16383,0.017488000293572743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,16383,0.5852160056432089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,1,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,1,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,1,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,1,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,16383,0.5930560032526652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,3,0.018800000349680584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,3,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,3,0.014064000298579534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,3,0.01479999969402949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,7,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,7,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,7,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,7,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,15,0.018170667191346485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,15,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,15,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,15,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,31,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,31,0.018917333334684372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,31,0.017488000293572743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,31,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,63,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,63,0.0195573332409064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,63,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,63,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,127,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,127,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,127,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,127,0.01748266691962878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,255,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,255,0.021375998854637146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,255,0.017530667285124462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,255,0.019482667247454327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,511,0.030773334205150604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,511,0.01953599974513054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,511,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,511,0.029130667448043823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,1023,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,1023,0.0476800004641215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,1023,0.01752000053723653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,1023,0.04628799855709076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,2047,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,2047,0.08092266817887624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,2047,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,2047,0.07885333398977916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,4095,0.019466667125622433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,4095,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,4095,0.14960533380508423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,4095,0.14659200112024942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,8191,0.01926933353145917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,8191,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,8191,0.2855093280474345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,8191,0.281605343023936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,16383,0.019354666272799175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,1,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,16383,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,1,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,1,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,1,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,16383,0.6152746677398682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,3,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,3,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,3,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,3,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,7,0.011530666301647821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,16383,0.5952959855397543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,7,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,7,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,7,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,15,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,15,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,15,0.012400000045696894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,15,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,31,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,31,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,31,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,31,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,63,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,63,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,63,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,63,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,127,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,127,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,127,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,127,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,255,0.011461333682139715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,255,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,255,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,255,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,511,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,511,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,511,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,511,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,1023,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,1023,0.013253333667914072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,1023,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,1023,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,2047,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,2047,0.01647466669480006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,2047,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,2047,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,4095,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,4095,0.01926933353145917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,4095,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,4095,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,8191,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,8191,0.025519999365011852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,8191,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,8191,0.023221333821614582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,16383,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,16383,0.04181333382924398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,16383,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,16383,0.03379733363787333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,float16,1,1.2258293628692627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,fp8,1,0.9856266975402832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,float16,1,1.2252373695373535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,fp8,1,0.9854826927185059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,float16,3,1.226965347925822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,float16,3,1.2253066698710124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,fp8,3,0.9852960109710693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,float16,7,1.2722293535868328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,float16,7,1.2699946562449138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,fp8,3,0.9846719900767008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,fp8,7,1.034821351369222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,fp8,7,1.035871982574463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,float16,15,1.294965346654256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,float16,15,1.2955573399861653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,fp8,15,1.0827733675638835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,fp8,15,1.0843573411305745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,float16,31,1.5784586270650227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,float16,31,1.5796000162760417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,fp8,31,1.3842239379882812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,fp8,31,1.3859200477600098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,float16,63,1.598090648651123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,float16,63,1.5981440544128418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,fp8,63,1.3971039454142253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,fp8,63,1.3989280064900715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,float16,1,1.2298346360524495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,fp8,1,0.9936853249867758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,float16,1,1.2293972969055176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,fp8,1,0.9964959621429443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,float16,3,1.229375998179118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,float16,3,1.2305280367533367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,fp8,3,0.9937280019124349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,fp8,3,0.9961547056833903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,float16,7,1.2708053588867188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,fp8,7,1.035749355951945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,float16,7,1.2711466948191326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,fp8,7,1.0373653570810955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,float16,15,1.2975573539733887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,fp8,15,1.0840213298797607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,fp8,15,1.084549347559611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,float16,15,1.2975680033365886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,float16,31,1.5792479515075684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,float16,31,1.5793066024780273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,fp8,31,1.3849226633707683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,fp8,31,1.3846559524536133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,fp8,63,1.3991626103719075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,float16,63,1.599397341410319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,fp8,63,1.3982240358988445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,float16,63,1.5991733868916829
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,fp8,1,1.001050631205241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,fp8,1,1.0005066394805908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,float16,1,1.235653320948283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,float16,1,1.234826644261678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,fp8,3,1.0037279923756917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,float16,3,1.2344053586324055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,fp8,3,1.0023787021636963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,float16,3,1.2345013618469238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,fp8,7,1.0374560356140137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,float16,7,1.2735466957092285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,fp8,7,1.0372479756673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,float16,7,1.2714880307515461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,fp8,15,1.085098663965861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,fp8,15,1.0850719610850017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,float16,15,1.2978133360544841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,float16,15,1.3002613385518391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,fp8,31,1.3854079246520996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,float16,31,1.5817599296569824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,float16,31,1.580399990081787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,fp8,31,1.3847626050313313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,float16,63,1.616986592610677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,float16,63,1.6024586359659831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,float16,1,0.09141866366068523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,fp8,63,1.3987360000610352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,fp8,63,1.398863951365153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,float16,1,0.09122666716575623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,fp8,1,0.08497599760691325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,fp8,1,0.0851146678129832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,float16,3,0.09139733513196309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,float16,3,0.09206933776537578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,fp8,3,0.08479467034339905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,fp8,3,0.08470933636029561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,float16,7,0.09126399954160054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,float16,7,0.09237866600354512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,fp8,7,0.08526933193206787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,fp8,7,0.08481599887212117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,float16,15,0.09093333284060161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,float16,15,0.09158933162689209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,fp8,15,0.08575466275215149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,fp8,15,0.08483733733495076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,float16,31,0.09104532996813457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,float16,31,0.09148800373077393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,fp8,31,0.08553066849708557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,fp8,31,0.08483200271924336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,float16,63,0.0911253293355306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,fp8,63,0.08547199765841167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,float16,63,0.09121066331863403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,fp8,63,0.08470933636029561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,float16,1,2.4457972844441733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,fp8,1,1.9658239682515461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,fp8,1,1.9603625933329265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,float16,1,2.4471093813578286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,fp8,3,1.9630187352498372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,fp8,3,1.9659786224365234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,float16,3,2.4507573445638022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,float16,3,2.460757255554199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,fp8,7,2.0700693130493164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,fp8,7,2.0642827351888022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,float16,7,2.537567933400472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,float16,7,2.541423956553141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,fp8,15,2.1625653902689614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,fp8,15,2.1653226216634116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,float16,15,2.6097119649251304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,float16,15,2.596405347188314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,fp8,31,2.774165471394857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,fp8,31,2.7652587890625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,float16,31,3.1639200846354165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,float16,31,3.1684373219807944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,fp8,1,1.9841814041137695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,fp8,1,1.9844640096028645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,float16,1,2.469919999440511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,float16,1,2.462565263112386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,fp8,3,1.997546672821045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,float16,3,2.4617439905802407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,fp8,3,1.9904747009277344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,float16,3,2.4619626998901367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,fp8,7,2.0713119506835938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,fp8,7,2.07041072845459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,float16,7,2.569946606953939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,float16,7,2.553290685017904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,fp8,15,2.1806987126668296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,fp8,15,2.1710294087727866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,float16,15,2.63592529296875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,float16,15,2.6263786951700845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,fp8,31,2.7771307627360025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,fp8,31,2.777146657307943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,float16,31,3.2158292134602866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,float16,31,3.204501469930013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,fp8,1,2.0153279304504395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,fp8,1,2.00600528717041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,float16,1,2.4755519231160483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,float16,3,2.4944052696228027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,float16,3,2.5084640185038247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,fp8,3,2.012416044871012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,float16,7,2.615765412648519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,float16,1,2.4733920097351074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,fp8,3,2.00708802541097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,fp8,7,2.0914880434672036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,fp8,7,2.077605406443278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,fp8,15,2.190122604370117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,float16,15,2.7368907928466797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,float16,15,2.742314656575521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,float16,7,2.6022720336914062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,fp8,15,2.1894346872965493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,float16,31,3.282421429951986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,float16,31,3.2824694315592446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,float16,1,0.17217065890630087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,float16,1,0.17160000403722128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,fp8,31,2.8161439895629883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,fp8,31,2.818629264831543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,fp8,1,0.16120533148447672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,float16,3,0.17178134123484293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,float16,3,0.17140267292658487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,fp8,3,0.16098666191101074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,fp8,3,0.16109866897265115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,float16,7,0.17222400506337485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,float16,7,0.17092265685399374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,fp8,7,0.160863995552063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,fp8,7,0.16074132919311523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,float16,15,0.1709386706352234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,float16,15,0.17105066776275635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,fp8,1,0.16172266999880472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,fp8,15,0.1606666644414266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,fp8,15,0.160863995552063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,float16,31,0.17139732837677002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,float16,31,0.17083199818929037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,1,0.0271573339899381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,1,0.02714666724205017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,1,0.02346133440732956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,1,0.023584000766277313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,3,0.027258666853109997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,3,0.02784000088771184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,3,0.02316266546646754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,3,0.023520000278949738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,7,0.027376001079877216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,7,0.02736533433198929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,7,0.02359466751416524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,fp8,31,0.16105600198109946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,7,0.02348800003528595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,15,0.02754133443037669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,15,0.0272533322374026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,15,0.023381332556406658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,15,0.02348800003528595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,31,0.031386665999889374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,31,0.03165333221356074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,31,0.029578665892283123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,31,0.029189333319664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,63,0.0316746657093366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,63,0.031680000325044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,fp8,31,0.16189866264661154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,63,0.029461334149042766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,63,0.02924266705910365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,127,0.031871999303499855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,127,0.03173333406448364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,127,0.02934933453798294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,127,0.02956799914439519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,255,0.03158933420976003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,255,0.03749866783618927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,255,0.02926933268706004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,255,0.03372266640265783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,511,0.03145066648721695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,511,0.0539626677831014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,511,0.029525332152843475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,511,0.05006400247414907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,1023,0.031632001201311745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,1023,0.02938666691382726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,1023,0.0867680013179779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,1023,0.08275199929873149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,2047,0.029285334050655365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,2047,0.03158933420976003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,2047,0.15235199530919394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,2047,0.14863466223080954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,4095,0.0316746657093366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,4095,0.029648000995318096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,4095,0.28388800223668414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,4095,0.27948800722757977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,8191,0.03296533226966858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,1,0.02714666724205017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,8191,0.030016000072161358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,1,0.027114666998386383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,1,0.02333866556485494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,8191,0.5488479932149252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,1,0.023541333774725597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,3,0.025642665723959606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,3,0.0273333340883255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,3,0.023381332556406658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,8191,0.5442026853561401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,3,0.023103999594847362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,7,0.027503999571005504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,7,0.02741866558790207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,7,0.023354666928450268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,7,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,15,0.027237333357334137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,15,0.027450665831565857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,15,0.02311466634273529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,15,0.023525332411130268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,31,0.031712000568707786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,31,0.03154666721820831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,31,0.029285334050655365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,63,0.0315786674618721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,31,0.02942933390537898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,63,0.031930667658646904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,63,0.029296000798543293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,127,0.03160000095764796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,127,0.031856000423431396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,63,0.029605334003766377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,127,0.02937600016593933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,127,0.029487999776999157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,255,0.03242133309443792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,255,0.037471999724706016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,255,0.029487999776999157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,255,0.03401600072781245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,511,0.033189333975315094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,511,0.029317334294319153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,511,0.05392000079154968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,511,0.050240000089009605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,1023,0.033226666351159416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,1023,0.08572266499201457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,1023,0.029482667644818623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,1023,0.08276799817879994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,2047,0.03189333279927572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,2047,0.15249066551526388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,2047,0.029520000020662945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,2047,0.14851199587186178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,4095,0.03187733391920725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,4095,0.02942933390537898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,4095,0.2845120032628377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,4095,0.27962666749954224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,8191,0.03146133323510488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,8191,0.029792000850041706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,8191,0.5502933263778687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,1,0.027162666122118633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,1,0.027615999182065327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,1,0.023552000522613525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,8191,0.5443520148595175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,1,0.023434666295846302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,3,0.027509334186712902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,3,0.026863999664783478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,3,0.02350933353106181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,3,0.02349333216746648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,7,0.027274665733178455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,7,0.027248000105222065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,7,0.023210667073726654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,7,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,15,0.02734400083621343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,15,0.02346666653951009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,15,0.027386667827765148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,15,0.023530667026837666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,31,0.03148799886306127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,31,0.02959999938805898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,31,0.03190933416287104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,31,0.02923733244339625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,63,0.03181866556406021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,63,0.03158933420976003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,63,0.029530666768550873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,63,0.029338667790095013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,127,0.031557333966096245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,127,0.03165333221356074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,127,0.02945599953333537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,127,0.02941333254178365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,255,0.031701333820819855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,255,0.037632000943024956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,255,0.02956799914439519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,255,0.033674667278925575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,511,0.03160533308982849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,511,0.05407466491063436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,511,0.029365333418051403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,511,0.050245334704717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,1023,0.031530665854612984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,1023,0.08653333783149719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,1023,0.02905600021282832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,1023,0.08313600222269694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,2047,0.03165333221356074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,2047,0.15250133474667868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,2047,0.02959466725587845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,2047,0.14846932888031006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,4095,0.03161599983771642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,4095,0.02956799914439519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,4095,0.2850240071614583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,4095,0.28066666920979816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,8191,0.03177600105603536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,8191,0.02942933390537898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,1,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,8191,0.5519413153330485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,1,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,8191,0.5437813202540079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,1,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,1,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,3,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,3,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,3,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,3,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,7,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,7,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,7,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,7,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,15,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,15,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,15,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,15,0.011765333513418833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,31,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,31,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,31,0.011333333949247995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,63,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,31,0.0120319997270902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,63,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,63,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,63,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,127,0.011776000261306763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,127,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,127,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,127,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,255,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,255,0.011482667177915573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,255,0.011461333682139715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,255,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,511,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,511,0.011391999820868174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,511,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,511,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,1023,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,1023,0.013386666774749756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,1023,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,1023,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,2047,0.01332266628742218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,2047,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,2047,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,2047,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,4095,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,4095,0.023317334552605946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,4095,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,4095,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,8191,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,8191,0.04083733260631561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,8191,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,8191,0.033546666304270424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,3,0.04562133550643921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,7,0.046069333950678505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,7,0.03992533435424169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,15,0.05606399973233541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,15,0.04983466863632202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,31,0.056159997979799904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,31,0.05042133231957754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,63,0.05629866818586985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,63,0.05004799862702688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,1,0.04419200122356415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,127,0.06435733536879222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,127,0.060405333836873375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,255,0.09711999694506328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,255,0.09085866808891296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,511,0.1585599978764852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,511,0.15246400237083435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,1023,0.28377066055933636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,1023,0.27321066459019977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,2047,0.5342186689376831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,1,0.03752533346414566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,2047,0.5156053304672241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,1,0.04393066465854645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,1,0.037808001041412354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,3,0.04585599899291992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,3,0.039813332259655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,7,0.0461706668138504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,4095,1.0374560356140137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,4095,0.9988906383514404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,7,0.039919999738534294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,15,0.05609600245952606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,15,0.04978133241335551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,31,0.05606933434804281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,31,0.050026665131251015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,63,0.05596800148487091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,63,0.05156266689300537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,127,0.06462400158246358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,127,0.06010133524735769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,255,0.096778670946757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,255,0.09090133508046468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,511,0.15869866808255514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,511,0.15223466356595358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,1023,0.28358399868011475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,3,0.03949866692225138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,1023,0.27331199248631793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,2047,0.5355413357416788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,2047,0.5150560140609741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,1,0.044122666120529175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,1,0.03764266769091288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,3,0.04605866471926371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,3,0.039605334401130676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,7,0.045994664231936135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,4095,0.9986186822255453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,4095,1.0499146779378254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,7,0.039813332259655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,15,0.05607999861240387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,15,0.0499946673711141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,31,0.04956800242265066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,31,0.05603733162085215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,63,0.056176001826922096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,63,0.050069332122802734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,127,0.06442133088906606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,127,0.060090666015942894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,255,0.0969493289788564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,255,0.09105066458384196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,511,0.1586133340994517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,511,0.1514079968134562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,1023,0.2855573296546936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,1023,0.2733386754989624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,2047,0.5372373263041178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,2047,0.5165599981943766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,1,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,1,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,3,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,3,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,7,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,7,0.013343999783198038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,4095,1.0158720016479492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,4095,1.063589334487915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,15,0.013690666606028875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,15,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,31,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,31,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,63,0.01332266628742218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,63,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,127,0.013237333546082178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,127,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,255,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,255,0.013253333667914072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,511,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,511,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,1023,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,1023,0.018357332795858383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,2047,0.03959999978542328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,2047,0.025237334271272022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,4095,0.0620000014702479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,1,0.009253333633144697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,4095,0.04155733436346054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,1,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,3,0.008879999940594038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,3,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,7,0.009317333499590555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,7,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,15,0.009045333291093508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,15,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,31,0.00915733352303505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,31,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,63,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,63,0.013082666943470636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,127,0.009973333527644476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,127,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,255,0.020351999749739964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,255,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,511,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,511,0.025461333493391674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,1023,0.03127466638882955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,1023,0.02924266705910365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,2047,0.039642666776975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,2047,0.040250666439533234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,4095,0.062463998794555664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,4095,0.05799466868241628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,8191,0.10707733035087585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,8191,0.09526399771372478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,1,0.009039999917149544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,16383,0.19350399573644003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,1,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,3,0.008842666943868002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,16383,0.16900267203648886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,3,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,7,0.00879466657837232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,7,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,15,0.00916800027092298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,15,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,31,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,31,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,63,0.009103999783595404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,63,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,127,0.009706666693091393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,127,0.012815999488035837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,255,0.021312000850836437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,255,0.012730666746695837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,511,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,511,0.02734400083621343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,1023,0.029125332832336426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,1023,0.031199999153614044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,2047,0.03988266736268997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,2047,0.040181333820025124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,4095,0.06247466802597046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,4095,0.058778668443361916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,8191,0.10724799831708272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,8191,0.09528000156084697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,1,0.009114666531483332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,16383,0.19341333707173666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,1,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,16383,0.16896533966064453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,3,0.009269333134094873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,3,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,7,0.009098666409651438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,7,0.010426666587591171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,15,0.008762666955590248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,15,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,31,0.009290666629870733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,31,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,63,0.010501333822806677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,63,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,127,0.01002133327225844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,127,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,255,0.021087999145189922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,255,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,511,0.02370133250951767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,511,0.027029333015282948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,1023,0.029205332199732464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,1023,0.0314026673634847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,2047,0.04020266731580099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,2047,0.03984533250331879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,4095,0.06229333579540253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,4095,0.05842133363087972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,8191,0.10545066992441814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,8191,0.09551999966303508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,1,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,16383,0.19342933098475137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,1,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,16383,0.16687466700871786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,3,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,7,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,3,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,7,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,15,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,15,0.011973333855470022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,31,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,31,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,63,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,127,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,63,0.011877333124478659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,127,0.011461333682139715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,255,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,255,0.01146666705608368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,511,0.012730666746695837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,511,0.012432000289360682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,1023,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,1023,0.011973333855470022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,2047,0.012442667037248611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,2047,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,4095,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,4095,0.015770666301250458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,8191,0.01752000053723653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,8191,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,1,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,16383,0.020773333807786305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,1,0.009183999771873156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,16383,0.01883200059334437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,3,0.013471999516089758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,3,0.009045333291093508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,7,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,15,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,7,0.009466666728258133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,15,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,31,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,31,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,63,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,63,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,127,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,127,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,255,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,255,0.013269333789745966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,511,0.021514666577180225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,511,0.019013332823912304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,1023,0.0315733328461647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,1023,0.028624000648657482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,2047,0.0518506666024526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,2047,0.04781333108743032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,4095,0.09107200304667155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,4095,0.08482666810353597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,8191,0.16902933518091837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,8191,0.16061333815256754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,1,0.013327999661366144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,1,0.009045333291093508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,16383,0.3254400094350179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,3,0.013237333546082178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,3,0.00915733352303505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,16383,0.30955733855565387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,7,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,7,0.009103999783595404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,15,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,15,0.010538666198650995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,31,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,31,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,63,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,63,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,127,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,127,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,255,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,255,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,511,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,511,0.01939733326435089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,1023,0.029717333614826202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,1023,0.03145600110292435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,2047,0.05184000233809153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,2047,0.04821333289146423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,4095,0.08492799599965413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,4095,0.09118400017420451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,8191,0.1690453290939331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,8191,0.16078399618466696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,1,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,16383,0.3267199993133545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,1,0.00914666677514712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,3,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,3,0.00949866697192192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,16383,0.31074132521947223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,7,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,7,0.009141333401203156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,15,0.013376000026861826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,31,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,15,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,31,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,63,0.014789332946141561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,63,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,127,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,127,0.015930666277805965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,255,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,255,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,511,0.022810667753219604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,511,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,1023,0.03202133377393087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,1023,0.029445332785447437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,2047,0.05209066470464071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,2047,0.048207998275756836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,4095,0.09097066521644592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,4095,0.0846506655216217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,8191,0.16100800037384033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,8191,0.1695093313852946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,1,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,16383,0.32654400666554767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,1,0.011509332805871964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,3,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,16383,0.31539199749628705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,3,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,7,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,7,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,15,0.011541333049535751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,15,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,31,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,63,0.010512000570694605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,31,0.011626667032639185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,63,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,127,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,127,0.011407999942700068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,255,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,255,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,511,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,511,0.011760000139474869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,1023,0.012223999947309494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,1023,0.013301332791646322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,2047,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,2047,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,4095,0.017594666530688603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,4095,0.017525333911180496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,8191,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,8191,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,16383,0.023226665953795116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,16383,0.021114667256673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,1,0.08282666901747386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,1,0.07021333277225494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,3,0.08476799726486206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,3,0.07240533332029979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,7,0.08689600229263306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,7,0.07452799876530965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,15,0.1042133371035258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,15,0.09297600388526917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,31,0.10335466265678406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,31,0.09309333562850952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,63,0.10529067118962605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,63,0.09295466542243958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,127,0.12355732917785645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,127,0.11348799864451091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,255,0.18524799744288126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,255,0.17294400930404663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,511,0.30829334259033203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,511,0.2938773234685262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,1023,0.5584906737009684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,1023,0.5342933336893717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,1,0.08329600095748901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,1,0.0708426684141159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,2047,1.0559840202331543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,3,0.08490133285522461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,3,0.07254933317502339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,2047,1.0132746696472168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,7,0.08676266670227051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,7,0.07436800003051758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,15,0.10520533720652263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,31,0.10518399874369304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,31,0.09322133660316467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,15,0.0937546690305074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,63,0.10560533404350281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,63,0.0935093363126119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,127,0.12595733006795248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,255,0.18553600708643594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,127,0.11412800351778667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,255,0.17486933867136636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,511,0.30825066566467285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,511,0.2956639925638835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,1023,0.5591893196105957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,1023,0.5342453320821127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,1,0.07025599976380666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,1,0.0831573357184728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,3,0.08506133159001668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,3,0.07303999861081441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,2047,1.0660853385925293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,7,0.08714666962623596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,2047,1.013317346572876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,15,0.10517332951227824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,7,0.07522666454315186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,15,0.09322667121887207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,31,0.10523200035095215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,31,0.09315199653307597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,63,0.09358933568000793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,63,0.10562666257222493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,127,0.12390933434168498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,127,0.11335466305414836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,255,0.1855306625366211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,255,0.17545600732167563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,511,0.30852266152699787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,511,0.2942826747894287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,1023,0.5686773459116617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,1023,0.5335359970728556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,1,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,1,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,2047,1.0786346594492595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,3,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,3,0.015930666277805965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,7,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,2047,1.0319466590881348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,7,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,15,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,15,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,31,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,31,0.015429332852363586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,63,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,63,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,127,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,127,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,255,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,255,0.016794666647911072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,511,0.02146133283774058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,511,0.018992000569899876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,1023,0.03766400118668874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,1023,0.02552533398071925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,2047,0.06202666461467743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,1,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,2047,0.043653334180514015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,3,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,1,0.014805333067973455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,3,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,7,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,7,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,15,0.013253333667914072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,15,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,31,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,31,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,63,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,63,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,127,0.015706667055686314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,127,0.018906666586796444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,255,0.019674666225910187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,255,0.02349333216746648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,511,0.02942399928967158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,511,0.03178666780392329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,1023,0.05029866596062978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,1023,0.05006400247414907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,2047,0.09107733766237895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,2047,0.08704533179601033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,4095,0.16927466789881387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,4095,0.16099733114242554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,8191,0.3287466764450073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,8191,0.30710933605829877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,1,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,1,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,16383,0.6548053423563639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,3,0.013194666554530462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,3,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,7,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,16383,0.5987786849339803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,15,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,7,0.01479999969402949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,15,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,31,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,31,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,63,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,63,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,127,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,127,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,255,0.019472000499566395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,255,0.023210667073726654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,511,0.029648000995318096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,1023,0.0499839981396993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,1023,0.05036800106366476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,511,0.0328053335348765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,2047,0.08984000484148662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,2047,0.08692800005276997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,4095,0.17046932379404703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,4095,0.16171733538309732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,8191,0.3323413332303365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,8191,0.3070080081621806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,1,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,1,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,3,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,3,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,7,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,7,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,15,0.013568000247081121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,16383,0.6650186777114868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,16383,0.599402666091919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,15,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,31,0.013562666873137156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,31,0.019365333020687103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,63,0.019434666881958645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,127,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,63,0.01351999988158544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,127,0.019727999965349834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,255,0.019258666783571243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,255,0.023045333723227184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,511,0.029685333371162415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,511,0.03161599983771642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,1023,0.04975999891757965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,1023,0.05016533533732096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,2047,0.09101866682370503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,2047,0.08695466319719951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,4095,0.1691360076268514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,4095,0.1611786683400472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,8191,0.33088000615437824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,8191,0.30633066097895306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,1,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,1,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,16383,0.6691733201344808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,3,0.011440000186363855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,3,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,16383,0.5990613301595052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,7,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,7,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,15,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,15,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,31,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,31,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,63,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,63,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,127,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,127,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,255,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,255,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,511,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,511,0.012602667013804117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,1023,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,1023,0.012725333372751871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,2047,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,2047,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,4095,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,4095,0.016837333639462788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,8191,0.021146667500336964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,8191,0.02125866711139679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,16383,0.039264000952243805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,16383,0.02532266577084859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,1,0.1604266663392385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,1,0.13209066788355509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,3,0.136272003253301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,3,0.1628320018450419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,7,0.16522666811943054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,7,0.142794668674469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,15,0.20171199242273966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,15,0.1786080002784729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,31,0.2021440068880717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,31,0.17899733781814575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,63,0.20351467529932657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,63,0.181002676486969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,127,0.23875200748443604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,127,0.21814932425816855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,255,0.36418131987253827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,255,0.339413324991862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,511,0.6090240081151327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,511,0.5809333324432373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,1,0.1588053305943807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,1023,1.1068480014801025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,1,0.13266133268674216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,3,0.16384533047676086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,3,0.13581333557764688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,1023,1.0543039639790852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,7,0.14242133498191833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,7,0.16694400707880655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,15,0.20162665843963623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,15,0.17918932437896729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,31,0.20169599850972494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,31,0.179530660311381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,63,0.20374399423599243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,63,0.181167999903361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,127,0.2404266595840454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,127,0.22043200333913168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,255,0.3632693290710449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,255,0.34092267354329425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,511,0.6090879837671915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,511,0.5806506474812826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,1,0.1585919956366221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,1,0.13212266564369202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,1023,1.1218079725901287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,3,0.16330132881800333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,1023,1.0560746987660725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,3,0.13706666231155396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,7,0.14229333400726318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,7,0.16702934106191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,15,0.20195200045903525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,15,0.1792746583620707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,31,0.20166399081548056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,31,0.1795626680056254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,63,0.203658660252889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,63,0.1813653310139974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,127,0.2405973275502523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,127,0.22020266453425089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,255,0.36400532722473145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,255,0.3410559892654419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,511,0.5806666612625122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,511,0.6249173482259115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,1,0.02107200026512146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,1,0.02327999969323476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,3,0.02316266546646754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,1023,1.0781706968943279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,1023,1.1316800117492676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,3,0.021040000021457672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,7,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,7,0.023557332654794056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,15,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,15,0.021274665991465252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,31,0.02319466571013133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,31,0.021055998901526134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,63,0.02365333338578542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,63,0.021349333226680756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,127,0.023189333577950794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,127,0.021456000705560047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,255,0.023141334454218548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,255,0.021354667842388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,511,0.03845333307981491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,511,0.02923733244339625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,1023,0.05834666887919108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,1023,0.04048000027736028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,1,0.3116106589635213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,1,0.254746675491333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,3,0.32052799065907794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,3,0.265669325987498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,7,0.3246346712112427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,7,0.27771200736363727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,15,0.39819733301798504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,15,0.34940799077351886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,31,0.3981599807739258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,31,0.3517013390858968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,63,0.4003946781158447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,63,0.35333867867787677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,127,0.4723413387934367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,127,0.43093868096669513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,255,0.7202026844024658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,1,0.3103040059407552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,255,0.6724906762441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,1,0.2566986680030823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,3,0.3205866614977519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,3,0.26543466250101727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,7,0.3272266586621602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,7,0.277349332968394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,15,0.3982400099436442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,15,0.3513760169347127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,31,0.3984053134918213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,31,0.35153599580128986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,63,0.4002666473388672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,63,0.3534880081812541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,127,0.47350935141245526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,127,0.43134931723276776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,1,0.2568320035934448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,255,0.7196586926778158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,1,0.30990399916966754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,255,0.6728160381317139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,3,0.26702932516733807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,3,0.3205920060475667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,7,0.3272533416748047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,7,0.2775839964548747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,15,0.34987199306488037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,15,0.3980426788330078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,31,0.398357351620992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,31,0.35142401854197186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,63,0.35362664858500165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,63,0.4002293348312378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,127,0.4740000168482463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,127,0.4312800168991089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,1,0.03631466627120972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,1,0.03347733368476232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,3,0.03755199909210205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,255,0.6726133028666178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,255,0.7445226510365804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,3,0.03362133353948593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,7,0.03751999884843826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,7,0.033514666060606636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,15,0.035775999228159584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,15,0.033733333150545754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,31,0.03754666695992152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,31,0.03369600077470144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,63,0.03579200059175491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,63,0.03363733241955439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,127,0.03799466788768768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,127,0.03342399994532267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,255,0.043381333351135254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,255,0.03396799912055334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,fp8,1,0.5007359981536865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,float16,1,0.6150079965591431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,float16,3,0.6338026523590088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,fp8,3,0.523690660794576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,fp8,7,0.549450675646464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,float16,7,0.6424906651178995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,float16,15,0.7880853017171224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,fp8,15,0.6934133370717367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,fp8,31,0.6953279972076416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,float16,31,0.7889333566029867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,float16,63,0.793071985244751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,fp8,63,0.6991840203603109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,fp8,127,0.855504035949707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,fp8,1,0.5028906663258871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,float16,127,0.9368533293406168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,float16,1,0.6124320030212402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,fp8,3,0.5239520072937012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,float16,3,0.636026660601298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,float16,7,0.6478773355484009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,fp8,7,0.5487146774927775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,fp8,15,0.6936799685160319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,float16,15,0.7897866566975912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,float16,31,0.7880213260650635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,fp8,31,0.696458657582601
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,float16,63,0.7941919962565104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,fp8,63,0.7005333105723063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,float16,127,0.9442400137583414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,fp8,127,0.8556000391642252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,float16,1,0.6114559968312582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,fp8,1,0.504858652750651
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,float16,3,0.6358666817347208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,fp8,3,0.5251359939575195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,float16,7,0.6484320163726807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,fp8,7,0.5504479805628458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,float16,15,0.7879733244578043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,fp8,31,0.697429339090983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,float16,31,0.7891039848327637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,fp8,15,0.6941493352254232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,float16,63,0.7980053424835205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,fp8,63,0.7004799842834473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,float16,127,0.980954647064209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,fp8,127,0.8619093100229899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,float16,1,0.06258133550484975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,fp8,1,0.0562666654586792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,float16,3,0.06359999875227611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,fp8,3,0.05648000041643778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,float16,7,0.06266666452089946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,fp8,7,0.05579733351866404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,float16,15,0.06342400113741557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,float16,31,0.06247466802597046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,fp8,15,0.05625600119431814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,fp8,31,0.05610666672388712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,float16,63,0.06424533327420552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,1,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,fp8,63,0.05629333357016245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,fp8,127,0.05819733440876007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,float16,127,0.06391466657320659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,1,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,3,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,3,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,7,0.017722666263580322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,7,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,15,0.0721013347307841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,31,0.019152000546455383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,31,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,15,0.017450666675964992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,63,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,63,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,127,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,127,0.02022933339079221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,255,0.029487999776999157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,255,0.029535998900731403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,511,0.04612799982229868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,511,0.04588800172011057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,1023,0.08085866769154866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,1023,0.07901866734027863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,2047,0.14643733700116476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,2047,0.1451359987258911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,4095,0.28148800134658813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,4095,0.2775413393974304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,8191,0.5503146648406982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,1,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,8191,0.5434079964955648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,1,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,3,0.01754133279124896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,3,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,7,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,7,0.017711999515692394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,15,0.019093333433071773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,15,0.017840000490347546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,16383,1.1552213033040364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,31,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,31,0.01756799966096878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,16383,1.2966933250427246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,63,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,63,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,127,0.021573332448800404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,127,0.02120000123977661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,255,0.029701332251230877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,255,0.02757866680622101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,511,0.04643733302752177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,511,0.04609066744645437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,1023,0.08064533273379008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,1023,0.07866666714350383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,2047,0.14601066708564758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,2047,0.1487573285897573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,4095,0.277621328830719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,4095,0.2834080060323079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,8191,0.556384007136027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,8191,0.5456159909566244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,1,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,1,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,3,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,7,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,3,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,7,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,15,0.019610666980346043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,16383,1.184175968170166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,15,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,16383,1.2971466382344563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,31,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,31,0.01747200017174085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,63,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,63,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,127,0.01942933350801468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,127,0.02149333308140437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,255,0.029648000995318096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,255,0.029343999922275543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,511,0.046351999044418335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,511,0.04580800235271454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,1023,0.08057066798210144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,1023,0.0790880024433136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,2047,0.14794133106867471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,2047,0.14628799756368002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,4095,0.2820853392283122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,4095,0.2775413393974304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,8191,0.5531306664148966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,8191,0.5438826481501261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,1,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,1,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,3,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,3,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,16383,1.344645341237386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,7,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,7,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,15,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,16383,1.3136746883392334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,15,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,31,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,31,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,63,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,63,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,127,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,127,0.011349332829316458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,255,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,255,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,511,0.012752000242471695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,511,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,1023,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,1023,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,2047,0.018464000274737675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,2047,0.01691199963291486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,4095,0.02146133283774058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,4095,0.01953599974513054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,8191,0.039701332648595176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,8191,0.025231999655564625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,16383,0.060736000537872314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,16383,0.04160533348719279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,float16,1,1.2211466630299885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,fp8,1,0.9965120156606039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,float16,3,1.2620906829833984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,fp8,3,1.0436320304870605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,float16,7,1.2819573084513347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,fp8,7,1.094330628712972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,float16,15,1.5725280443827312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,fp8,15,1.3809547424316406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,float16,31,1.5735626220703125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,fp8,31,1.3876585960388184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,float16,63,1.5933173497517903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,fp8,63,1.397098700205485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,float16,1,1.2161706288655598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,fp8,1,1.0000426769256592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,float16,3,1.2649973233540852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,fp8,3,1.0463519891103108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,float16,7,1.2950186729431152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,fp8,7,1.096506675084432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,float16,15,1.5774025917053223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,fp8,15,1.384933312733968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,fp8,31,1.395674705505371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,float16,31,1.5903573036193848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,float16,63,1.6280639966328938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,fp8,63,1.416437307993571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,fp8,1,1.0073493321736653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,float16,1,1.22269868850708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,float16,3,1.268997351328532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,fp8,3,1.0484533309936523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,fp8,7,1.1089866956075032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,float16,7,1.3071946303049724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,float16,15,1.595263957977295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,fp8,15,1.3899893760681152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,fp8,31,1.4138347307840984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,float16,31,1.619599978129069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,float16,1,0.11369066437085469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,fp8,1,0.10325866937637329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,float16,63,1.6392159461975098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,fp8,63,1.4535840352376301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,fp8,3,0.10371200243631999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,float16,3,0.11371733744939168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,float16,7,0.11540800333023071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,fp8,7,0.10311999917030334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,float16,15,0.11507733662923177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,fp8,15,0.10322133700052898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,float16,31,0.11499200264612834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,fp8,31,0.10226666927337646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,float16,63,0.1155413289864858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,fp8,63,0.10342400272687276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,fp8,1,2.148805300394694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,float16,1,2.5937867164611816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,float16,3,2.6739679972330728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,fp8,3,2.191263993581136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,float16,7,2.7102187474568686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,fp8,7,2.279205322265625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,float16,15,3.213893254597982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,fp8,15,2.8330774307250977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,float16,31,3.210869471232096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,fp8,31,2.8388853073120117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,float16,1,2.6231199900309243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,fp8,1,2.172933260599772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,fp8,3,2.2098347345987954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,float16,3,2.6969547271728516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,fp8,7,2.3020853996276855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,float16,7,2.739034652709961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,float16,15,3.2274080912272134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,fp8,15,2.852149327596029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,fp8,31,2.8549814224243164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,float16,31,3.2279411951700845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,fp8,1,2.215456008911133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,float16,1,2.6771039962768555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,fp8,3,2.249039967854818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,float16,3,2.73140811920166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,fp8,7,2.3306454022725425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,float16,7,2.7783679962158203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,fp8,15,2.8588692347208657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,float16,15,3.2414560317993164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,fp8,31,2.8724053700764975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,float16,31,3.2448107401529946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,float16,1,0.2172266642252604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,fp8,1,0.19338667392730713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,float16,3,0.21783999601999918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,fp8,3,0.19300800561904907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,fp8,7,0.19211200873057047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,float16,7,0.2169119914372762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,float16,15,0.21763734022776285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,fp8,15,0.1923253337542216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,1,0.025733334322770435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,1,0.023103999594847362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,3,0.0271519993742307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,3,0.0234400009115537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,7,0.027098665634791057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,7,0.023647998770078022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,15,0.03161066770553589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,15,0.028463999430338543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,31,0.031354665756225586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,31,0.02921066681543986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,63,0.03164266546567281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,63,0.029343999922275543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,127,0.03579200059175491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,127,0.033743999898433685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,float16,31,0.21634133656819662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,fp8,31,0.19265600045522055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,255,0.05248000224431356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,255,0.0499839981396993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,511,0.08515733480453491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,511,0.0828906645377477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,1023,0.15197333693504333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,1023,0.14642133315404257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,2047,0.2740746736526489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,2047,0.28197334210077923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,4095,0.5459680159886678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,4095,0.5292533238728842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,1,0.026005332668622334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,1,0.023071999351183575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,3,0.0272533322374026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,3,0.02390933285156886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,7,0.02743999908367793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,7,0.023584000766277313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,15,0.03141333411137263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,15,0.029440000653266907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,8191,1.041759967803955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,8191,1.0713706811269124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,31,0.03155199935038885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,31,0.029440000653266907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,63,0.02926933268706004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,127,0.035887998839219414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,63,0.0316746657093366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,127,0.03381866713364919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,255,0.05202133456865946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,255,0.050341332952181496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,511,0.08474133412043254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,511,0.08305599788824718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,1023,0.15059199929237366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,1023,0.14672533671061197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,2047,0.28177066644032794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,2047,0.2741439938545227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,4095,0.5470879872639974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,4095,0.5294613440831503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,1,0.02555199960867564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,1,0.023103999594847362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,3,0.027349332968393963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,3,0.0236160010099411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,7,0.02736533433198929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,7,0.023567999402681988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,8191,1.0900426705678303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,8191,1.0436800320943196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,15,0.031285333136717476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,15,0.029813334345817566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,31,0.031370667119820915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,31,0.02921066681543986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,63,0.031845333675543465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,63,0.029285334050655365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,127,0.03565333286921183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,127,0.033914667864640556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,255,0.05208000044027964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,255,0.05008000135421753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,511,0.08554133772850037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,511,0.08252266546090443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,1023,0.15069333712259927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,1023,0.14646400014559427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,2047,0.28197866678237915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,2047,0.2755146622657776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,4095,0.5537600119908651
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,4095,0.5297653277715048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,1,0.011429333438475927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,1,0.0116799995303154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,3,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,8191,1.102170705795288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,3,0.01259200026591619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,7,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,8191,1.049221356709798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,7,0.011994666109482447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,15,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,15,0.011541333049535751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,31,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,31,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,63,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,63,0.011493333925803503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,127,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,127,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,255,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,255,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,511,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,511,0.012709333250919977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,1023,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,1023,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,2047,0.020879998803138733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,2047,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,4095,0.025077333052953083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,4095,0.038506666819254555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,8191,0.05932799975077311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,1,0.035989334185918175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,8191,0.03963200002908707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,1,0.03584533433119456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,1,0.03127466638882955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,1,0.03159466634194056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,3,0.03543466577927271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,3,0.03555733213822047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,3,0.031397332747777305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,3,0.03140799949566523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,7,0.03700799991687139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,7,0.0360000009338061
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,7,0.03160533308982849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,7,0.0314026673634847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,15,0.03752533346414566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,15,0.03790933390458425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,15,0.033359999457995095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,15,0.03330666571855545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,31,0.04419200122356415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,31,0.04398400088151296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,31,0.03965866565704346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,31,0.040074666341145836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,63,0.04372799893220266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,63,0.04394666850566864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,63,0.03994666785001755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,63,0.03958400090535482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,127,0.0459146648645401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,127,0.04561600089073181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,127,0.03979199876387914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,127,0.03993066648642222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,255,0.04560000201066335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,255,0.05203199883302053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,255,0.03990400085846583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,255,0.048010667165120445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,511,0.04387733340263367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,511,0.07664533456166585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,511,0.039877332746982574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,511,0.07242666681607564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,1023,0.04008533308903376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,1023,0.0450186679760615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,1023,0.12567466497421265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,1023,0.11975466211636861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,2047,0.045514668027559914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,2047,0.039919999738534294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,2047,0.2231946587562561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,2047,0.21822933355967203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,4095,0.045696000258127846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,1,0.03570133447647095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,4095,0.040591999888420105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,4095,0.4205706516901652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,1,0.03590933233499527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,1,0.03169599920511246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,4095,0.41358399391174316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,1,0.03182933231194814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,3,0.03577066709597906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,3,0.035946667194366455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,3,0.03123733401298523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,3,0.03145600110292435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,7,0.037578667203585304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,7,0.03166399896144867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,7,0.03764266769091288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,7,0.03175999969244003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,15,0.03779733429352442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,15,0.037461332976818085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,15,0.03352533280849457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,15,0.0334346666932106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,31,0.04379733403523763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,31,0.04402133325735728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,31,0.03958400090535482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,31,0.040106666584809623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,63,0.044069334864616394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,63,0.044079999128977455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,63,0.03975466638803482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,63,0.039706667264302574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,127,0.04524266719818115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,127,0.04566933214664459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,127,0.04081599911053976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,127,0.03956799954175949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,255,0.04555733501911163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,255,0.05218133330345154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,255,0.039674667020638786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,255,0.047983999053637184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,511,0.04572266836961111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,511,0.07671999931335449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,511,0.04011733333269755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,511,0.07259200016657512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,1023,0.04573333263397217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,1023,0.12455999851226807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,1023,0.04048533240954081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,1023,0.12009066343307495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,2047,0.04606399933497111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,2047,0.2241706649462382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,2047,0.039861333866914116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,2047,0.21805334091186523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,4095,0.04603200157483419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,4095,0.040074666341145836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,4095,0.42641599973042804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,1,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,1,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,4095,0.41439998149871826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,1,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,1,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,3,0.01340266689658165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,3,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,3,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,7,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,3,0.013194666554530462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,7,0.01219733307758967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,7,0.012757333616415659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,7,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,15,0.013898666948080063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,15,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,15,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,15,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,31,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,31,0.012773333738247553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,31,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,31,0.012709333250919977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,63,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,63,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,63,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,63,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,127,0.012826666235923767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,127,0.012810666114091873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,127,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,127,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,255,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,255,0.012400000045696894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,255,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,255,0.013376000026861826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,511,0.012778667112191519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,511,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,511,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,511,0.014432000617186228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,1023,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,1023,0.017658667018016178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,1023,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,1023,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,2047,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,2047,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,2047,0.023178666830062866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,2047,0.025978667040665943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,4095,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,4095,0.04177600145339966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,4095,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,1,0.00902399979531765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,1,0.009279999881982803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,4095,0.033770665526390076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,1,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,1,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,3,0.009296000003814697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,3,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,3,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,3,0.009343999748428663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,7,0.009232000137368837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,7,0.009056000038981438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,7,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,7,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,15,0.009226666763424873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,15,0.008767999708652496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,15,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,15,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,31,0.009103999783595404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,31,0.009248000259200731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,31,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,31,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,63,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,63,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,63,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,63,0.011338666081428528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,127,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,127,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,127,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,127,0.013317332913478216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,255,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,255,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,255,0.012655999511480331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,255,0.012794667234023413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,511,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,511,0.021029333273569744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,511,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,511,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,1023,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,1023,0.023061332603295643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,1023,0.023002666731675465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,1023,0.025439999997615814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,2047,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,2047,0.029146666328112285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,2047,0.023530667026837666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,2047,0.029365333418051403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,4095,0.019317333896954853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,4095,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,4095,0.039781334499518074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,4095,0.03972800076007843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,8191,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,8191,0.06228266656398773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,8191,0.023317334552605946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,8191,0.05820266902446747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,16383,0.018922666708628338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,16383,0.10533866286277771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,16383,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,1,0.008938666433095932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,16383,0.09497599800427754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,1,0.008767999708652496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,1,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,1,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,3,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,3,0.00919999989370505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,3,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,7,0.008992000172535578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,7,0.009109333157539368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,7,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,7,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,15,0.009098666409651438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,15,0.008874666566650072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,15,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,15,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,31,0.00879466657837232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,31,0.009429333110650381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,31,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,31,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,63,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,63,0.010480000327030817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,63,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,63,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,127,0.010549332946538925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,127,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,127,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,255,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,127,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,255,0.010565333068370819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,255,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,255,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,511,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,511,0.021168000996112823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,511,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,511,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,1023,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,1023,0.022954667607943218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,1023,0.023386667172114056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,1023,0.02613866577545802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,2047,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,2047,0.02940266579389572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,2047,0.02314666658639908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,2047,0.03129599988460541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,4095,0.01877333347996076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,4095,0.03967999915281931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,4095,0.023178666830062866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,4095,0.03999999910593033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,8191,0.01887999971707662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,8191,0.062314664324124656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,8191,0.02293333411216736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,8191,0.05816000203291575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,16383,0.01871466636657715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,16383,0.1053706705570221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,16383,0.02316266546646754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,1,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,16383,0.09512533744176228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,1,0.011461333682139715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,1,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,1,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,3,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,3,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,3,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,3,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,7,0.011605333536863327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,7,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,7,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,7,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,15,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,15,0.010506667196750641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,15,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,31,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,15,0.011472000430027643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,31,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,31,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,31,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,63,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,63,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,63,0.01138666644692421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,127,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,63,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,127,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,127,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,127,0.011461333682139715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,255,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,255,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,255,0.011365332951148352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,255,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,511,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,511,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,511,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,511,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,1023,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,1023,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,1023,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,1023,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,2047,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,2047,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,2047,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,2047,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,4095,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,4095,0.013466666142145792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,4095,0.011333333949247995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,4095,0.01543466622630755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,8191,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,8191,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,8191,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,8191,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,16383,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,16383,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,16383,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,1,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,16383,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,1,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,1,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,1,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,3,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,3,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,3,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,3,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,7,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,7,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,7,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,7,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,15,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,15,0.013269333789745966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,15,0.015482666591803232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,15,0.015498666713635126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,31,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,31,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,31,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,31,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,63,0.014805333067973455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,63,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,63,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,63,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,127,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,127,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,127,0.02048533285657565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,127,0.02089600016673406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,255,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,255,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,255,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,255,0.020842666427294414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,511,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,511,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,511,0.02060266708334287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,511,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,1023,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,1023,0.02164799968401591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,1023,0.02096533278624217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,1023,0.02332799881696701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,2047,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,2047,0.020938667158285778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,2047,0.03163733333349228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,2047,0.03333866596221924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,4095,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,4095,0.05220800141493479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,4095,0.020997333029905956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,4095,0.05189866820971171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,8191,0.015775999675194424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,8191,0.09105066458384196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,8191,0.021029333273569744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,8191,0.08780266841252644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,16383,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,16383,0.16872000694274902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,1,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,16383,0.021498667697111767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,16383,0.15868266423543295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,1,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,1,0.015882667154073715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,3,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,1,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,3,0.013301332791646322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,3,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,3,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,7,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,7,0.013194666554530462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,7,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,7,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,15,0.01351999988158544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,15,0.012784000486135483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,15,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,31,0.013082666943470636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,15,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,31,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,31,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,31,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,63,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,63,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,63,0.01754666616519292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,63,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,127,0.015546667079130808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,127,0.015482666591803232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,127,0.021055998901526134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,127,0.01979200045267741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,255,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,255,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,255,0.020202666521072388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,255,0.021125334004561108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,511,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,511,0.01747200017174085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,511,0.021082667013009388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,511,0.021104000508785248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,1023,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,1023,0.023013333479563396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,1023,0.02094399929046631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,1023,0.02362666775782903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,2047,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,2047,0.032032000521818794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,2047,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,2047,0.03368533402681351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,4095,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,4095,0.021269333859284718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,4095,0.05235200126965841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,4095,0.05222400029500326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,8191,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,8191,0.09120532870292664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,8191,0.021312000850836437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,8191,0.08692800005276997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,16383,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,16383,0.16910932461420694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,16383,0.020938667158285778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,1,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,1,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,1,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,16383,0.15868266423543295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,3,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,1,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,3,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,3,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,3,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,7,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,7,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,7,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,7,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,15,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,15,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,15,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,15,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,31,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,31,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,31,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,31,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,63,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,63,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,63,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,63,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,127,0.010559999694426855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,127,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,127,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,127,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,255,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,255,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,255,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,511,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,255,0.011440000186363855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,511,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,511,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,511,0.011418666690587997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,1023,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,1023,0.012042666474978128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,1023,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,1023,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,2047,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,2047,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,2047,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,2047,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,4095,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,4095,0.011413333316644033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,4095,0.01581866666674614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,4095,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,8191,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,8191,0.01748266691962878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,8191,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,8191,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,16383,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,16383,0.019402666638294857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,16383,0.012165332833925882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,16383,0.019445333629846573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,1,0.06450133522351582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,1,0.06451199948787689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,1,0.05442133545875549
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,3,0.06439466774463654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,1,0.05418666700522105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,3,0.05394133428732554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,3,0.05407466491063436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,3,0.06434666613737743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,7,0.06651199857393901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,7,0.056330665946006775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,7,0.0664160003264745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,7,0.05624533196290334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,15,0.06849599877993266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,15,0.0682773341735204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,15,0.057904000083605446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,15,0.05834133426348368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,31,0.08096533517042796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,31,0.08078399797280629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,31,0.07237866520881653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,31,0.07251733541488647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,63,0.08084799846013387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,63,0.08077333370844524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,63,0.07247466842333476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,63,0.07246933380762736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,127,0.08268266419569652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,127,0.08291199803352356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,127,0.07287466526031494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,127,0.07233599821726482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,255,0.08292266726493835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,255,0.09702400366465251
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,255,0.07243200143178304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,255,0.08730133374532063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,511,0.08311466872692108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,511,0.14272000392278036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,511,0.07277333239714305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,511,0.1346399982770284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,1023,0.0827946662902832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,1023,0.23672000567118326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,1023,0.07287466526031494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,1023,0.2283786733945211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,2047,0.08286933104197185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,2047,0.07266133526961009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,2047,0.42770131429036456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,1,0.06440000236034393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,1,0.06443733473618825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,2047,0.4171946843465169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,1,0.05409599840641022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,1,0.05435200035572052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,3,0.06444266438484192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,3,0.0643039991458257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,3,0.05407466491063436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,3,0.054586668809254967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,7,0.06657066444555919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,7,0.06659199794133504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,7,0.055573334296544395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,7,0.056421334544817604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,15,0.0683786670366923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,15,0.06846933563550313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,15,0.05820799867312113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,31,0.08042133351167043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,31,0.0809440016746521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,15,0.058490668733914696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,31,0.07252799967924754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,31,0.07237866520881653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,63,0.08104533453782399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,63,0.07260799904664357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,63,0.08311999837557475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,63,0.07226133346557617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,127,0.08272533118724823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,127,0.08272533118724823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,127,0.07337066531181335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,127,0.07236266632874806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,255,0.08292266726493835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,255,0.09723732868830363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,255,0.07313600182533264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,255,0.08686932921409607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,511,0.08285866677761078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,511,0.14418133099873862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,511,0.07294933497905731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,511,0.1341813306013743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,1023,0.08231999973456065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,1023,0.23733866214752197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,1023,0.07276266813278198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,1023,0.22801067431767783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,2047,0.0832586685816447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,2047,0.0727893312772115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,2047,0.43666664759318036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,1,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,1,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,1,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,2047,0.41886401176452637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,1,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,3,0.015482666591803232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,3,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,3,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,3,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,7,0.01575999955336253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,7,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,7,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,7,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,15,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,15,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,15,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,15,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,31,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,31,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,31,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,63,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,31,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,63,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,63,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,63,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,127,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,127,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,127,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,127,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,255,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,255,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,255,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,255,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,511,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,511,0.01848000039656957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,511,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,511,0.016805333395799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,1023,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,1023,0.02384000023206075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,1023,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,1023,0.023061332603295643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,2047,0.017535999417304993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,2047,0.041877334316571556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,2047,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,1,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,2047,0.03578133384386698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,1,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,1,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,1,0.014682666709025701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,3,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,3,0.017445333302021027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,3,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,3,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,7,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,7,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,7,0.014282666146755219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,7,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,15,0.01762666677435239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,15,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,15,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,15,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,31,0.018826667219400406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,31,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,31,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,31,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,63,0.021007999777793884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,63,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,63,0.016869333883126576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,63,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,127,0.02089600016673406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,127,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,127,0.02178666740655899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,127,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,255,0.021029333273569744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,255,0.021007999777793884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,255,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,255,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,511,0.021359999974568684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,511,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,511,0.018976000448067982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,511,0.017498667041460674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,1023,0.020981334149837494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,1023,0.031386665999889374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,1023,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,1023,0.025946666797002155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,2047,0.021040000021457672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,2047,0.04614399870236715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,2047,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,2047,0.040405333042144775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,4095,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,4095,0.07638399799664815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,4095,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,4095,0.06841599941253662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,8191,0.020970667401949566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,8191,0.13402666648228964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,8191,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,8191,0.12379200259844463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,16383,0.021333334346612293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,16383,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,16383,0.25303999582926434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,1,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,1,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,1,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,1,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,16383,0.23561600844065347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,3,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,3,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,3,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,3,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,7,0.01746133342385292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,7,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,7,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,7,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,15,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,15,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,15,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,15,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,31,0.01878400022784869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,31,0.01882133384545644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,31,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,31,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,63,0.020901332298914593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,63,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,63,0.01687466725707054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,63,0.016890666137139004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,127,0.02111999938885371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,127,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,127,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,255,0.021370666722456615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,127,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,255,0.021477334201335907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,255,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,255,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,511,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,511,0.023258666197458904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,511,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,511,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,1023,0.021061333517233532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,1023,0.03148266673088074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,1023,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,1023,0.025349333882331848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,2047,0.021194666624069214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,2047,0.04576533536116282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,2047,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,2047,0.040063999593257904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,4095,0.02109866589307785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,4095,0.07600533465544383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,4095,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,4095,0.06856533388296764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,8191,0.02111999938885371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,8191,0.13424000144004822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,8191,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,8191,0.12408000230789185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,16383,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,16383,0.25224000215530396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,16383,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,1,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,1,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,16383,0.23612799247105917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,1,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,1,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,3,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,3,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,3,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,3,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,7,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,7,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,7,0.010527999450763067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,7,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,15,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,15,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,15,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,15,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,31,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,31,0.010527999450763067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,31,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,31,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,63,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,63,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,63,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,127,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,63,0.011498666057984034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,127,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,127,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,127,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,255,0.011407999942700068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,255,0.010255999863147736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,255,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,255,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,511,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,511,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,511,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,511,0.012549333274364471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,1023,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,1023,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,1023,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,1023,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,2047,0.013770667215188345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,2047,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,2047,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,2047,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,4095,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,4095,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,4095,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,4095,0.013536000003417334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,8191,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,8191,0.019391999890406925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,8191,0.013269333789745966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,8191,0.018079999834299088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,16383,0.012826666235923767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,16383,0.025424001117547352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,16383,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,1,0.12192533413569133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,16383,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,1,0.12180266777674358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,1,0.09941333532333374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,1,0.09969600041707356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,3,0.12185066938400269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,3,0.0995146632194519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,3,0.12206400434176128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,3,0.09941333532333374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,7,0.1262399951616923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,7,0.12595733006795248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,7,0.10333866874376933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,7,0.10342933734258015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,15,0.12809600432713827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,15,0.12812266747156778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,15,0.10770133137702942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,15,0.10752532879511516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,31,0.1546933352947235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,31,0.1546880006790161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,31,0.13617600003878275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,31,0.1362879971663157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,63,0.1564959983030955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,63,0.15659733613332114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,63,0.13806933164596558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,63,0.13829867045084634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,127,0.15660799543062845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,127,0.15758400162061056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,127,0.13824533422787985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,127,0.13820800185203552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,255,0.15826666355133057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,255,0.18553600708643594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,255,0.1381760040918986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,255,0.1669173240661621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,511,0.15869866808255514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,511,0.27749866247177124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,511,0.13821333646774292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,511,0.2611146569252014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,1023,0.1586666703224182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,1023,0.13832533359527588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,1,0.12180266777674358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,1023,0.46184531847635907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,1,0.123690664768219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,1,0.10117333134015401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,1,0.10135466853777568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,1023,0.44439999262491864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,3,0.12383466958999634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,3,0.1218826671441396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,3,0.10148800412813823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,3,0.10151466727256775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,7,0.12607466181119284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,7,0.126202662785848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,7,0.1033066709836324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,7,0.10390933354695638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,15,0.12982933719952902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,15,0.12811733285586038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,15,0.10752000411351521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,15,0.10972266395886739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,31,0.15471466382344565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,31,0.15455999970436096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,31,0.13623467087745667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,63,0.1566986640294393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,31,0.1366986632347107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,63,0.15650666753451029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,63,0.1381493310133616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,63,0.13795199990272522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,127,0.15849066774050394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,127,0.15661866466204324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,127,0.13824533422787985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,127,0.1383840044339498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,255,0.1586720049381256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,255,0.1852746605873108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,255,0.1383093297481537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,255,0.16708266735076904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,511,0.15866133570671082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,511,0.13809067010879517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,511,0.27752532561620075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,511,0.2609440088272095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,1023,0.15852800011634827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,1023,0.13822399576505026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,1023,0.47520001729329425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,1,0.019354666272799175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,1,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,1023,0.44378666083017987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,1,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,1,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,3,0.019359999646743137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,3,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,3,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,7,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,3,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,7,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,7,0.018778666853904724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,7,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,15,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,15,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,15,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,15,0.019573333362738293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,31,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,31,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,31,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,31,0.019402666638294857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,63,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,63,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,63,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,63,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,127,0.019573333362738293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,127,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,127,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,127,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,255,0.019402666638294857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,255,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,255,0.018816000471512478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,255,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,511,0.021530665457248688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,511,0.025285333395004272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,511,0.019007999449968338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,511,0.023210667073726654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,1023,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,1023,0.037615999579429626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,1023,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,1023,0.03219733387231827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,1,0.23641065756479898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,1,0.236842672030131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,1,0.19154665867487589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,1,0.19153066476186117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,3,0.2367146611213684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,3,0.23663467168807983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,3,0.19130132595698038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,3,0.19177067279815674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,7,0.24456000328063965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,7,0.2446720004081726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,7,0.19981332619984946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,7,0.19986132780710855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,15,0.2490666707356771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,15,0.20849599440892538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,15,0.2096266746520996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,15,0.251418670018514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,31,0.3020266691843669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,31,0.3022453387578328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,31,0.26545600096384686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,31,0.26576000452041626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,63,0.30587200323740643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,63,0.306277334690094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,63,0.26748265822728473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,63,0.26947200298309326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,127,0.3081706762313843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,127,0.2695733308792114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,127,0.3081600069999695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,127,0.2699306607246399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,255,0.30826133489608765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,255,0.26942400137583417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,255,0.36398935317993164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,1,0.2384213407834371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,255,0.32684266567230225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,fp8,1,0.19393066565195718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,1,0.2382026712099711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,fp8,1,0.1936053236325582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,3,0.2388746738433838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,fp8,3,0.19369600216547647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,3,0.23841599623362222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,fp8,3,0.1934773325920105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,7,0.2460319995880127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,fp8,7,0.20158400138219199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,7,0.24491200844446817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,fp8,7,0.20060799519220987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,15,0.2508853276570638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,15,0.2508106629053752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,fp8,15,0.2100106676419576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,fp8,15,0.20985066890716553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,31,0.3031040032704671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,fp8,31,0.2659146587053935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,31,0.30217599868774414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,fp8,31,0.2653866608937581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,63,0.30645867188771564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,fp8,63,0.269269327322642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,63,0.30619200070699054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,fp8,63,0.2683733304341634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,127,0.31019200881322223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,127,0.3084160089492798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,fp8,127,0.26977066198984784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,fp8,127,0.27111999193827313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,255,0.3096746603647868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,255,0.36423468589782715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,fp8,255,0.27132266759872437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,1,0.029493334392706554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,fp8,255,0.3265013297398885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,1,0.029690665503342945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,1,0.027744000156720478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,1,0.029088000456492107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,3,0.02976000060637792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,3,0.029557332396507263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,3,0.02922133356332779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,3,0.029370665550231934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,7,0.029472000896930695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,7,0.029546665648619335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,7,0.027978666126728058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,7,0.027552001178264618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,15,0.029322666426499683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,15,0.02932800104220708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,15,0.027509334186712902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,15,0.02757866680622101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,31,0.03062933435042699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,31,0.02934933453798294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,31,0.02739733209212621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,31,0.027690666417280834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,63,0.02977599948644638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,63,0.02938133229811986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,63,0.0276053324341774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,63,0.027482666075229645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,127,0.02916266769170761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,127,0.029440000653266907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,127,0.02770666778087616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,127,0.027589333554108936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,255,0.02996266633272171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,255,0.029722665747006733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,255,0.02736533433198929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,255,0.027749332288901012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,float16,1,0.4659786621729533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,float16,1,0.46614933013916016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,fp8,1,0.3760586579640706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,fp8,1,0.37609068552652997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,float16,3,0.4660693407058716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,fp8,3,0.375765323638916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,float16,3,0.46588265895843506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,fp8,3,0.37588798999786377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,float16,7,0.48257601261138916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,float16,7,0.4832746585210164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,fp8,7,0.3929866552352905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,fp8,7,0.3942399819691976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,float16,15,0.49253865083058673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,float16,15,0.49302931626637775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,fp8,15,0.4110026756922404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,fp8,15,0.4109333356221517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,float16,31,0.5974239905675253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,float16,31,0.5988373359044393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,fp8,31,0.5238613287607828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,fp8,31,0.5236426591873169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,float16,63,0.6051520109176636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,float16,63,0.6049600044886271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,fp8,63,0.529642661412557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,fp8,63,0.5298986832300822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,float16,127,0.6103946765263876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,float16,127,0.60971732934316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,fp8,127,0.5333493153254191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,fp8,127,0.5331893364588419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,float16,1,0.46987199783325195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,float16,1,0.4681386550267537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,fp8,1,0.38020265102386475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,fp8,1,0.3800106843312581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,float16,3,0.4698400100072225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,float16,3,0.4684319893519084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,fp8,3,0.37992533047993976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,fp8,3,0.37994666894276935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,float16,7,0.483733336130778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,float16,7,0.4825013478597005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,fp8,7,0.39445332686106366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,fp8,7,0.3943146864573161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,float16,15,0.49287466208140057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,fp8,15,0.4121546745300293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,float16,15,0.49270399411519367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,fp8,15,0.41281068325042725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,float16,31,0.5984426736831665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,float16,31,0.5975146691004435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,fp8,31,0.5251146554946899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,fp8,31,0.5249919891357422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,float16,63,0.6054933468500773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,fp8,63,0.5293279886245728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,float16,63,0.6047199964523315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,fp8,63,0.529642661412557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,fp8,127,0.5330400069554647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,float16,127,0.6110080083211263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,float16,127,0.6095626751581827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,fp8,127,0.5330666700998942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,float16,1,0.04801600178082784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,float16,1,0.04798933366934458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,fp8,1,0.046015997727712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,fp8,1,0.04560000201066335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,float16,3,0.048245335618654885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,float16,3,0.04805333415667216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,fp8,3,0.04615999758243561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,fp8,3,0.0459146648645401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,float16,7,0.04789333542188009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,float16,7,0.048058668772379555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,fp8,7,0.04567466676235199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,fp8,7,0.04590400060017904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,float16,15,0.04789333542188009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,fp8,15,0.0459146648645401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,float16,15,0.047968000173568726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,fp8,15,0.04580800235271454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,float16,31,0.04852266609668732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,float16,31,0.048309331138928734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,fp8,31,0.04570133487383524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,fp8,31,0.046053335070610046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,float16,63,0.048207998275756836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,float16,63,0.0476693312327067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,fp8,63,0.04572266836961111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,fp8,63,0.045941332976023354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,float16,127,0.047839999198913574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,float16,127,0.048010667165120445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,fp8,127,0.045754666129748024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,fp8,127,0.04594666759173075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,1,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,1,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,1,0.013365333278973898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,1,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,3,0.013237333546082178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,3,0.013269333789745966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,3,0.01333333303531011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,3,0.013248000293970108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,7,0.013370666652917862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,7,0.013653332988421122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,7,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,7,0.013440000514189402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,15,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,15,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,15,0.013248000293970108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,15,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,31,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,31,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,31,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,31,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,63,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,63,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,63,0.015664000064134598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,63,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,127,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,127,0.01735466718673706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,127,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,127,0.015594666202863058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,255,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,255,0.019648000597953796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,255,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,255,0.017514667163292568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,511,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,511,0.026762666801611584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,511,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,511,0.02571733295917511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,1023,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,1023,0.01562133307258288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,1023,0.04152533411979675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,1023,0.03972266614437103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,2047,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,2047,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,2047,0.07043733199437459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,2047,0.06849599877993266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,4095,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,4095,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,4095,0.12784533699353537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,4095,0.1269653340180715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,8191,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,8191,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,8191,0.2445759971936544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,8191,0.2428213357925415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,16383,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,16383,0.016000000139077503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,1,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,1,0.014837333311637243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,1,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,16383,0.493834654490153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,1,0.01414399966597557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,3,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,3,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,16383,0.5013386805852255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,3,0.014538666854302088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,3,0.013386666774749756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,7,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,7,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,7,0.014618666221698126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,7,0.013616000612576803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,15,0.013381333400805792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,15,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,15,0.014432000617186228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,31,0.016352000335852306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,15,0.013343999783198038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,31,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,31,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,31,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,63,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,63,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,63,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,63,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,127,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,127,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,127,0.016783999900023144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,127,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,255,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,255,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,255,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,255,0.018842666099468868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,511,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,511,0.016656000167131424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,511,0.02565866708755493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,511,0.025498665869235992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,1023,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,1023,0.04167999823888143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,1023,0.01666133354107539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,1023,0.039893334110577904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,2047,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,2047,0.07085333267847697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,2047,0.01661866654952367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,2047,0.06858133276303609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,4095,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,4095,0.1296266714731852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,4095,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,4095,0.1279093325138092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,8191,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,8191,0.24672534068425497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,8191,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,8191,0.2434026598930359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,16383,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,16383,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,16383,0.5008906523386637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,1,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,1,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,1,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,1,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,3,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,16383,0.5040853420893351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,3,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,3,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,3,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,7,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,7,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,7,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,7,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,15,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,15,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,15,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,15,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,31,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,31,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,31,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,31,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,63,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,63,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,63,0.011381333072980246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,127,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,63,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,127,0.012213333199421564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,127,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,127,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,255,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,255,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,255,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,255,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,511,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,511,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,511,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,511,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,1023,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,1023,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,1023,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,1023,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,2047,0.012730666746695837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,2047,0.016704000532627106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,2047,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,2047,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,4095,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,4095,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,4095,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,4095,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,8191,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,8191,0.025450666745503742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,8191,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,8191,0.02369600037733714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,16383,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,16383,0.0415786678592364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,16383,0.01321600005030632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,16383,0.03382399926582972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,fp8,1,0.7419466972351074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,float16,1,0.9233120282491049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,float16,1,0.9255786736806234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,fp8,1,0.7421066761016846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,fp8,3,0.7428106466929117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,float16,3,0.9243306318918864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,float16,3,0.9238826433817545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,fp8,3,0.7427306969960531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,float16,7,0.957856019337972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,float16,7,0.9570346673329672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,fp8,7,0.7792212963104248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,fp8,7,0.7810826301574707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,float16,15,0.97707200050354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,float16,15,0.9768959681193033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,fp8,15,0.815994660059611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,fp8,15,0.8145120143890381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,float16,31,1.1877813339233398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,float16,31,1.1892213026682537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,fp8,31,1.0408586661020915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,fp8,31,1.0414133071899414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,float16,63,1.2034026781717937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,float16,63,1.2019840081532795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,fp8,63,1.0517280101776123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,fp8,63,1.0518186887105305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,float16,1,0.9304107030232748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,float16,1,0.9295146465301514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,fp8,1,0.7530826727549235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,fp8,1,0.7533439795176188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,float16,3,0.9311733245849609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,float16,3,0.9299466609954834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,fp8,3,0.7535359859466553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,fp8,3,0.7532959779103597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,float16,7,0.9589440027872721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,float16,7,0.9578986962636312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,fp8,7,0.7813279628753662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,fp8,7,0.7811786333719889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,fp8,15,0.8164479732513428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,float16,15,0.9792160193125407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,fp8,15,0.8167893091837565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,float16,15,0.9776586691538492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,fp8,31,1.0419360001881917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,float16,31,1.1897013187408447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,fp8,31,1.0417226950327556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,float16,31,1.1883573532104492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,fp8,63,1.0529013474782307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,fp8,63,1.052181323369344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,float16,63,1.201690673828125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,float16,63,1.2052213350931804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,float16,1,0.08849599957466125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,fp8,1,0.08087466657161713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,fp8,1,0.08096533517042796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,float16,1,0.086709330479304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,float16,3,0.08733866612116496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,fp8,3,0.0813973347345988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,float16,3,0.0885759989420573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,fp8,3,0.08078399797280629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,float16,7,0.08725333213806152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,float16,7,0.08885332942008972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,fp8,7,0.08089600006739299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,fp8,7,0.0806879997253418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,float16,15,0.08698667089144389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,fp8,15,0.08097599943478902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,float16,15,0.08910399675369263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,fp8,15,0.0806933343410492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,float16,31,0.08886933326721191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,fp8,31,0.08099199831485748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,float16,31,0.08780800302823384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,fp8,31,0.08066666622956593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,float16,63,0.08891200025876363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,float16,63,0.08916266759236653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,fp8,63,0.08190399905045827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,fp8,63,0.08097599943478902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,fp8,1,1.4739359219868977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,fp8,1,1.4734667142232258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,float16,1,1.840453306833903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,float16,1,1.8424213727315266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,fp8,3,1.4754506746927898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,fp8,3,1.4774880409240723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,float16,3,1.8402719497680664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,float16,3,1.8404107093811035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,fp8,7,1.5512107213338215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,fp8,7,1.5510080655415852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,float16,7,1.9052213033040364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,float16,7,1.9083147048950195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,fp8,15,1.625450611114502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,fp8,15,1.6231892903645833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,float16,15,1.9466026624043782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,float16,15,1.9459412892659504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,fp8,31,2.0735413233439126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,fp8,31,2.074650605519613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,float16,31,2.3707946141560874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,float16,31,2.367546717325846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,fp8,1,1.501423994700114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,fp8,1,1.498751958211263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,float16,1,1.8525813420613606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,float16,1,1.8517759641011555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,fp8,3,1.4998879432678223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,fp8,3,1.5007200241088867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,float16,3,1.855829397837321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,float16,3,1.8519199689229329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,fp8,7,1.5573280652364094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,fp8,7,1.554693380991618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,float16,7,1.9128586451212566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,float16,7,1.9106027285257976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,fp8,15,1.6282560030619304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,float16,15,1.9836266835530598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,float16,15,1.9681599934895833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,fp8,15,1.6272106170654297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,fp8,31,2.0794240633646646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,float16,31,2.443354606628418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,float16,31,2.446085294087728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,float16,1,0.16691199938456217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,float16,1,0.16660799582799277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,fp8,31,2.0797173182169595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,fp8,1,0.15611732999483743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,fp8,1,0.1548799971739451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,fp8,3,0.15504533052444458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,float16,3,0.16602133711179098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,float16,7,0.1665013333161672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,float16,7,0.16580800215403238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,fp8,7,0.1549013356367747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,float16,15,0.16666133205095926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,float16,3,0.16591466466585794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,float16,15,0.1651946703592936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,fp8,3,0.15686399737993875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,fp8,15,0.15479466319084167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,fp8,15,0.15450132886568704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,float16,31,0.16704533497492471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,fp8,7,0.1551466683546702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,float16,31,0.16671466827392578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,1,0.02178666740655899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,1,0.02178666740655899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,fp8,31,0.1546986699104309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,1,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,1,0.019343999524911244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,3,0.021546666820844013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,3,0.02146666745344798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,fp8,31,0.15494400262832642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,3,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,3,0.019461333751678467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,7,0.0216799999276797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,7,0.023082666099071503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,7,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,7,0.01933866615096728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,15,0.022266666094462078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,15,0.022954667607943218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,15,0.019359999646743137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,15,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,31,0.02534399926662445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,31,0.025466665625572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,31,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,31,0.02364266663789749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,63,0.025642665723959606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,63,0.025407999753952026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,63,0.023733332753181458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,63,0.023552000522613525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,127,0.02569066733121872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,127,0.02571200082699458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,127,0.02366400013367335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,127,0.023445333043734234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,255,0.025701334079106648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,255,0.029722665747006733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,255,0.023178666830062866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,255,0.027765333652496338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,511,0.027248000105222065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,511,0.04375466704368591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,511,0.023786666492621105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,511,0.04164800047874451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,1023,0.027072000006834667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,1023,0.06862399975458781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,1023,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,1023,0.06671466430028279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,2047,0.027301333844661713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,2047,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,2047,0.1204853355884552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,2047,0.11948266625404358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,4095,0.027301333844661713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,4095,0.0235359991590182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,4095,0.22601600488026938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,8191,0.027263998985290527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,4095,0.22246932983398438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,8191,0.023455999791622162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,1,0.02130666623512904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,1,0.021546666820844013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,1,0.01894933357834816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,8191,0.4347626765569051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,1,0.019050666441520054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,3,0.02143999934196472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,8191,0.4314560095469157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,3,0.021231998999913532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,3,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,3,0.019039999693632126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,7,0.0220320001244545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,7,0.023434666295846302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,7,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,7,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,15,0.023018665611743927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,15,0.02319466571013133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,15,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,15,0.018976000448067982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,31,0.025461333493391674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,31,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,31,0.023120000958442688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,31,0.025578667720158894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,63,0.025706666211287182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,63,0.025477332373460133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,63,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,63,0.023402666052182514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,127,0.02553066611289978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,127,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,127,0.02348266790310542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,127,0.023546665906906128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,255,0.025311999022960663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,255,0.029482667644818623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,255,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,255,0.027248000105222065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,511,0.02573866645495097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,511,0.043653334180514015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,511,0.023168000082174938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,511,0.04167999823888143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,1023,0.026741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,1023,0.06863466898600261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,1023,0.023557332654794056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,1023,0.06645333270231883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,2047,0.025594666600227356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,2047,0.12154666582743327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,2047,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,2047,0.11896000305811565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,4095,0.027024000883102417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,4095,0.023200000325838726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,4095,0.2262399991353353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,4095,0.22265599171320596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,8191,0.025519999365011852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,8191,0.024282666544119518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,8191,0.4370559851328532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,1,0.01138666644692421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,1,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,1,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,8191,0.4310773213704427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,1,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,3,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,3,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,3,0.013386666774749756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,3,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,7,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,7,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,7,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,7,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,15,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,15,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,15,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,15,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,31,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,31,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,31,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,31,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,63,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,63,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,63,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,63,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,127,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,127,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,127,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,127,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,255,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,255,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,255,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,255,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,511,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,511,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,511,0.011722666521867117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,511,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,1023,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,1023,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,1023,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,1023,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,2047,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,2047,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,2047,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,2047,0.017514667163292568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,4095,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,4095,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,4095,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,4095,0.023189333577950794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,8191,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,8191,0.0400693342089653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,8191,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,8191,0.03358400116364161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,1,0.03562133262554804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,1,0.03183999905983607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,3,0.03562666724125544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,3,0.03183466692765554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,7,0.03578133384386698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,7,0.03339733431736628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,15,0.043605332573254905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,15,0.0395359992980957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,31,0.04381866753101349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,31,0.03979733337958654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,63,0.04385066529115041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,63,0.04091199984153112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,127,0.05199466645717621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,127,0.047728002071380615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,255,0.07482666770617168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,511,0.12382933497428894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,511,0.1199679970741272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,255,0.07179200152556102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,1023,0.22219733397165933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,1023,0.21390400330225626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,2047,0.41703466574350995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,2047,0.40256532033284503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,1,0.03533333291610082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,1,0.03146133323510488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,4095,0.8084212938944498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,3,0.035887998839219414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,3,0.03179199993610382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,7,0.03584533433119456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,4095,0.7803413073221842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,7,0.03335466732581457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,15,0.03982399900754293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,15,0.04358933369318644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,31,0.04367466767628988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,31,0.03991466760635376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,63,0.044165333112080894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,63,0.04005866746107737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,127,0.051557332277297974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,127,0.04780800143877665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,255,0.07673066854476929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,255,0.07064533233642578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,511,0.12382933497428894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,511,0.11983999609947205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,1023,0.222271998723348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,1023,0.213919997215271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,2047,0.42587733268737793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,2047,0.4025973478953044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,1,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,1,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,4095,0.8315786520640055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,3,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,4095,0.7893973191579183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,3,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,7,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,7,0.013301332791646322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,15,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,15,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,31,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,31,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,63,0.012815999488035837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,63,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,127,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,127,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,255,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,255,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,511,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,511,0.014709333578745524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,1023,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,1023,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,2047,0.04012800008058548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,2047,0.025519999365011852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,4095,0.061936000982920326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,1,0.00878399983048439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,4095,0.04154666761557261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,1,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,3,0.00898133342464765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,3,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,7,0.00874133345981439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,7,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,15,0.009141333401203156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,15,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,31,0.008912000184257826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,31,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,63,0.008837333569924036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,63,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,127,0.009039999917149544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,127,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,255,0.01951466624935468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,255,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,511,0.023056000471115112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,511,0.025290665527184803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,1023,0.029253333806991577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,1023,0.02958400050799052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,2047,0.039701332648595176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,2047,0.03999999910593033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,4095,0.06251733501752217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,4095,0.05834133426348368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,8191,0.10539199908574422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,8191,0.09485333164532979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,1,0.009082666908701261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,1,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,3,0.009312000125646591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,3,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,16383,0.16691199938456217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,16383,0.1935840050379435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,7,0.009119999905427298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,7,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,15,0.008816000074148178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,31,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,15,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,31,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,63,0.009039999917149544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,63,0.012736000120639801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,127,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,127,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,255,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,255,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,511,0.02306666721900304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,511,0.025941332181294758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,1023,0.03145600110292435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,1023,0.028368001182874043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,2047,0.03956799954175949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,2047,0.040149333576361336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,4095,0.062261333068211876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,4095,0.05820799867312113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,8191,0.10558933019638062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,8191,0.09510933359464009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,1,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,16383,0.19330666462580362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,1,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,3,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,16383,0.16774400075276694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,3,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,7,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,7,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,15,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,15,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,31,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,31,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,63,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,63,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,127,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,127,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,255,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,255,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,511,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,511,0.011434666812419891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,1023,0.011551999797423681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,1023,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,2047,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,2047,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,4095,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,4095,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,8191,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,8191,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,1,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,16383,0.020986666282018025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,16383,0.018986667195955913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,1,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,3,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,3,0.015930666277805965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,7,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,7,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,15,0.013269333789745966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,15,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,31,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,31,0.017535999417304993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,63,0.014853333433469137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,63,0.021040000021457672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,127,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,127,0.021013334393501282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,255,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,255,0.021104000508785248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,511,0.021221332252025604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,511,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,1023,0.034245334565639496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,1023,0.03162133445342382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,2047,0.051957334081331887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,2047,0.0498933345079422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,4095,0.0909493366877238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,4095,0.08682666222254436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,8191,0.16885866721471152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,8191,0.1565013329188029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,1,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,1,0.015583999454975128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,16383,0.324565331141154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,3,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,3,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,16383,0.2958880066871643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,7,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,7,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,15,0.01333333303531011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,15,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,31,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,31,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,63,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,63,0.019343999524911244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,127,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,127,0.021040000021457672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,255,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,255,0.021194666624069214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,511,0.021322667598724365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,511,0.02332799881696701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,1023,0.03142400085926056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,1023,0.03384533276160558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,2047,0.05211733281612396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,2047,0.050240000089009605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,4095,0.09121599793434143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,4095,0.08534933129946391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,8191,0.16870933771133423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,8191,0.1565600037574768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,1,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,16383,0.32470399141311646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,1,0.011338666081428528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,16383,0.29639466603597003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,3,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,7,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,3,0.011567999919255575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,7,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,15,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,15,0.01137599969903628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,31,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,63,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,31,0.011744000017642975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,63,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,127,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,127,0.011519999553759893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,255,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,255,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,511,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,511,0.011861333002646765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,1023,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,1023,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,2047,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,2047,0.01595199977358182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,4095,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,4095,0.01685333376129468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,8191,0.019802667200565338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,8191,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,16383,0.02279466638962428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,1,0.06434666613737743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,16383,0.02144533395767212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,1,0.05433600147565206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,3,0.06467733283837636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,3,0.05634133517742157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,7,0.06666666766007741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,7,0.05805333455403646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,15,0.07097599903742473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,31,0.07868266602357228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,15,0.08021866778532664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,31,0.0722453345855077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,63,0.0804746647675832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,63,0.07271466652552287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,127,0.09494933485984802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,127,0.0870293378829956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,255,0.14235732952753702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,255,0.1333013375600179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,511,0.234442671140035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,511,0.22612800200780234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,1023,0.42508800824483234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,1023,0.4066133499145508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,1,0.06406933565934499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,1,0.054272000988324486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,2047,0.803546667098999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,3,0.06494399905204773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,3,0.05643733342488607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,2047,0.7716639836629232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,7,0.06646400193373363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,7,0.058506667613983154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,15,0.0805920014778773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,15,0.07076799869537354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,31,0.07291199763615926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,31,0.07896000146865845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,63,0.0804906686147054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,63,0.07260266443093617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,127,0.09499733646710713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,127,0.08689600229263306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,255,0.1423733333746592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,255,0.13402666648228964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,511,0.23499733209609985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,511,0.2262079914410909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,1023,0.42957866191864014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,1023,0.4064799944559733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,1,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,3,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,1,0.01635733370979627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,2047,0.8262399832407633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,3,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,7,0.017680000513792038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,7,0.015962666521469753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,2047,0.7806133429209391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,15,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,31,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,31,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,15,0.01599466676513354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,63,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,63,0.016261332978804905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,127,0.01687466725707054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,255,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,127,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,255,0.016330666840076447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,511,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,511,0.02126399924357732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,1023,0.037087999284267426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,1023,0.025098666548728943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,1,0.017445333302021027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,2047,0.06284800171852112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,2047,0.04302933315436045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,1,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,3,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,3,0.014661333213249842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,7,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,7,0.013653332988421122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,15,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,15,0.01762666677435239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,31,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,31,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,63,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,63,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,127,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,127,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,255,0.023258666197458904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,255,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,511,0.029578665892283123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,511,0.025594666600227356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,1023,0.045968001087506614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,1023,0.03982399900754293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,2047,0.0747519979874293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,2047,0.06843199829260509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,4095,0.13425599535306296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,4095,0.12384000420570374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,8191,0.2507893244425456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,8191,0.23683732748031616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,1,0.017690667261679966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,1,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,16383,0.48629868030548096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,3,0.01754133279124896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,3,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,16383,0.45923201243082684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,7,0.017407999684413273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,7,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,15,0.017498667041460674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,15,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,31,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,31,0.016869333883126576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,63,0.021327999730904896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,63,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,127,0.020037333170572918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,127,0.01736533393462499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,255,0.023232000569502514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,255,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,511,0.03151999910672506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,511,0.025231999655564625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,1023,0.045824001232783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,1023,0.03982399900754293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,2047,0.07474666833877563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,2047,0.06855466465155284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,4095,0.1342026690642039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,4095,0.12395733594894409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,8191,0.2510666648546855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,8191,0.23762667179107666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,1,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,16383,0.4914506673812866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,1,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,3,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,3,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,16383,0.46050135294596356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,7,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,7,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,15,0.012794667234023413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,31,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,15,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,31,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,63,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,63,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,127,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,127,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,255,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,255,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,511,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,511,0.012527999778588613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,1023,0.012682666381200155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,1023,0.012810666114091873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,2047,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,2047,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,4095,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,4095,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,8191,0.021541332205136616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,8191,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,16383,0.039690665900707245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,16383,0.025583999852339428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,1,0.12171199917793274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,1,0.10117333134015401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,3,0.12401599685351054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,3,0.10342400272687276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,7,0.12626133362452188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,7,0.10960533221562703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,15,0.1525759994983673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,15,0.1360586682955424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,31,0.1532426675160726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,31,0.13610133528709412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,63,0.15440533558527628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,127,0.18147732814153036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,127,0.1665173371632894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,63,0.13710400462150574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,255,0.27535466353098553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,255,0.2571840087572734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,511,0.45770665009816486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,511,0.43829333782196045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,1,0.12156266967455547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,1,0.10142399867375691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,3,0.12381333112716675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,1023,0.8345920244852701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,1023,0.7939199606577555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,3,0.10419733325640361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,7,0.12584533294041952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,7,0.10945066809654236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,15,0.1544426679611206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,15,0.13647466897964478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,31,0.15244799852371216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,31,0.1362773378690084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,63,0.1546346644560496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,63,0.1368000010649363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,127,0.181386669476827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,127,0.16672533750534058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,255,0.25704532861709595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,255,0.2757546703020732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,511,0.4699733257293701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,511,0.4371306498845418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,1,0.023120000958442688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,1023,0.860368013381958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,1,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,1023,0.8085546493530273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,3,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,3,0.02111999938885371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,7,0.02292266736427943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,15,0.023189333577950794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,7,0.02107200026512146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,15,0.02130666623512904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,31,0.023071999351183575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,31,0.021333334346612293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,63,0.021386665602525074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,63,0.021488000949223835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,127,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,127,0.020799999435742695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,255,0.02309866746266683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,255,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,511,0.03782399992148081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,511,0.025311999022960663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,1023,0.05881600081920624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,1023,0.03944533318281174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,1,0.23589332898457846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,1,0.19367466370264688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,3,0.2424160043398539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,3,0.20164267222086588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,7,0.20998400449752808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,7,0.24655999739964804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,15,0.29977599779764813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,15,0.26497600475947064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,31,0.2998613317807515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,31,0.26543466250101727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,63,0.30214399099349976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,63,0.26732800404230755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,127,0.3553440173467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,127,0.3248266577720642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,255,0.5419520139694214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,fp8,1,0.1955733299255371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,1,0.23583465814590454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,255,0.506880005200704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,3,0.2425653338432312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,fp8,3,0.20151466131210327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,fp8,7,0.20997865994771323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,7,0.24762133757273355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,15,0.3001333276430766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,fp8,15,0.2652053236961365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,31,0.30000533660252887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,fp8,31,0.2670666575431824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,63,0.30219199260075885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,fp8,63,0.26714134216308594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,127,0.3569706678390503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,fp8,127,0.32681065797805786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,1,0.033615998923778534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,255,0.5650346676508585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,fp8,255,0.5068693161010742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,1,0.03152533372243246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,3,0.03366933266321818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,3,0.03150933235883713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,7,0.035589332381884255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,7,0.03146666785081228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,15,0.03442666679620743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,31,0.03537066777547201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,15,0.03139200061559677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,31,0.03187733391920725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,63,0.035461333890755974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,63,0.03148266673088074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,127,0.03472000112136205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,127,0.0317546675602595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,255,0.039850667119026184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,255,0.03156266609827677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,fp8,1,0.3797866503397624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,float16,1,0.46403733889261883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,float16,3,0.47806934515635174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,fp8,3,0.39485331376393634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,fp8,7,0.4142186641693115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,float16,7,0.487552007039388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,float16,15,0.5930399894714355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,fp8,15,0.5221333503723145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,fp8,31,0.5255200068155924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,float16,31,0.5931306680043539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,float16,63,0.5970933437347412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,fp8,63,0.5270400047302246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,fp8,127,0.6440639893213908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,float16,127,0.7070933183034261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,fp8,1,0.38179198900858563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,float16,1,0.4618719816207886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,fp8,3,0.39665599664052326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,float16,3,0.47838934262593585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,fp8,7,0.4144373337427775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,float16,7,0.4882133404413859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,float16,15,0.5948906739552816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,fp8,15,0.5225119988123575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,fp8,31,0.5250133275985718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,float16,31,0.592906673749288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,fp8,63,0.5278026660283407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,float16,63,0.5982293287913004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,float16,127,0.7495253086090088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,float16,1,0.06035733222961426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,fp8,1,0.05179733534653982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,fp8,127,0.643882671991984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,float16,3,0.060720001657803856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,fp8,3,0.05233600238958994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,float16,7,0.060133333007494606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,fp8,7,0.054154664278030396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,float16,15,0.060405333836873375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,fp8,15,0.05230399966239929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,float16,31,0.060496002435684204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,fp8,31,0.05273599922657013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,float16,63,0.06028800209363302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,fp8,63,0.052245333790779114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,1,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,float16,127,0.06144000093142191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,1,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,3,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,fp8,127,0.05410666763782501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,3,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,7,0.013295999417702356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,7,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,15,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,15,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,31,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,31,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,63,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,63,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,127,0.01894933357834816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,127,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,255,0.025642665723959606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,255,0.02552533398071925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,511,0.039664000272750854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,511,0.03995199998219808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,1023,0.06980800131956737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,1023,0.06840000053246816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,2047,0.12761066357294717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,2047,0.1260426640510559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,4095,0.24266133705774942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,4095,0.2385653257369995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,8191,0.4742559989293416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,8191,0.466480016708374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,1,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,3,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,1,0.013749333719412485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,3,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,7,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,16383,0.9688213666280111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,7,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,15,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,15,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,31,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,31,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,63,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,63,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,16383,1.0418986479441326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,127,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,127,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,255,0.02554133286078771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,255,0.025226667523384094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,511,0.039919999738534294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,511,0.039674667020638786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,1023,0.06862399975458781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,1023,0.06840533514817555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,2047,0.1283680001894633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,2047,0.12571733196576437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,4095,0.2430186669031779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,4095,0.23916266361872354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,8191,0.4764639933904012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,8191,0.4678613344828288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,1,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,1,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,3,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,3,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,7,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,7,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,15,0.011450666934251785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,15,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,16383,1.0661919911702473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,16383,1.0541333357493083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,31,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,31,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,63,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,63,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,127,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,127,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,255,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,255,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,511,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,511,0.01221866657336553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,1023,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,1023,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,2047,0.01878400022784869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,2047,0.016890666137139004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,4095,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,4095,0.02006400004029274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,8191,0.040037333965301514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,8191,0.025290665527184803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,16383,0.06100266675154368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,16383,0.0418453315893809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,float16,1,0.921557346979777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,fp8,1,0.7505280176798502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,float16,3,0.9502027034759521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,fp8,3,0.7820906639099121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,float16,7,0.9681119918823242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,fp8,7,0.8203946749369303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,fp8,15,1.0373760064442952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,float16,15,1.179696003595988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,float16,31,1.1793440183003743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,fp8,31,1.0417866706848145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,float16,63,1.1886719862620037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,fp8,63,1.0470879872639973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,float16,1,0.9172000090281168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,fp8,1,0.7567199865976969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,fp8,3,0.7849600315093994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,float16,3,0.9517280260721842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,fp8,7,0.8229546546936035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,float16,7,0.9724746545155843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,fp8,15,1.0377120176951091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,float16,15,1.187391996383667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,fp8,31,1.0512693723042805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,float16,31,1.209781328837077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,fp8,63,1.0817173322041829
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,float16,63,1.2392586867014568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,float16,1,0.10968533158302307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,fp8,1,0.09729066491127014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,float16,3,0.10928533474604289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,fp8,3,0.09726933638254802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,float16,7,0.10939199725786845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,fp8,7,0.0981066624323527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,float16,15,0.10968533158302307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,fp8,15,0.09735999504725139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,float16,31,0.10997866590817769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,fp8,31,0.09787733356157939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,float16,63,0.10951466361681621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,fp8,63,0.09751466910044353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,fp8,1,1.5945653915405273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,float16,1,1.9418293635050456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,fp8,3,1.6365493138631184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,float16,3,2.0012319882710776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,fp8,7,1.70360533396403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,float16,7,2.0439732869466147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,fp8,15,2.1261866887410483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,float16,15,2.410618623097738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,fp8,31,2.137829303741455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,float16,31,2.4141386349995932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,fp8,1,1.6545066833496094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,float16,1,2.034506638844808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,fp8,3,1.6840906143188477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,float16,3,2.0782507260640464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,fp8,7,1.7502293586730957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,float16,7,2.112175941467285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,fp8,15,2.1513867378234863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,float16,15,2.443941275278727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,fp8,31,2.160026709238688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,float16,31,2.4501333236694336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,float16,1,0.20800000429153442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,fp8,1,0.18130133549372354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,fp8,3,0.18107199668884277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,float16,3,0.2060746749242147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,float16,7,0.20769067605336508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,fp8,7,0.1817013422648112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,float16,15,0.20643200476964316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,fp8,15,0.1813760002454122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,1,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,1,0.021488000949223835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,3,0.02162133405605952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,3,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,7,0.021525333325068157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,7,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,15,0.025605333348115284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,15,0.02327466756105423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,31,0.025573333104451496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,31,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,float16,31,0.20775999625523886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,63,0.025519999365011852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,63,0.02327999969323476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,fp8,31,0.18152000506718954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,127,0.029701332251230877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,127,0.027493332823117573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,255,0.04188799858093262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,255,0.041402667760849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,511,0.06833600004514058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,511,0.06644799808661143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,1023,0.11979200442632039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,1023,0.1176479955514272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,2047,0.2243679960568746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,2047,0.21818133195241293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,4095,0.4311893383661906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,4095,0.42046932379404706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,1,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,1,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,3,0.021312000850836437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,3,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,7,0.021503999829292297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,7,0.019152000546455383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,8191,0.8271466890970866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,8191,0.8468533356984457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,15,0.025146665672461193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,15,0.023525332411130268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,31,0.025397333006064098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,31,0.023306667804718018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,63,0.025333332518736523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,63,0.023242667317390442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,127,0.029440000653266907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,127,0.02770666778087616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,255,0.04208533465862274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,255,0.03989866624275843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,511,0.06864533325036366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,511,0.0668213317791621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,1023,0.11973866820335388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,1023,0.1176639993985494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,2047,0.22445867458979288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,2047,0.21821333964665732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,4095,0.43295466899871826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,4095,0.4214080174763997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,1,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,1,0.011407999942700068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,3,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,8191,0.8614559968312582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,3,0.01145600030819575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,8191,0.8279413382212321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,7,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,7,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,15,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,15,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,31,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,31,0.011727999895811081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,63,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,63,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,127,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,127,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,255,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,255,0.012319999436537424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,511,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,511,0.012096000214417776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,1023,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,1023,0.013248000293970108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,2047,0.01959466685851415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,2047,0.019674666225910187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,4095,0.025226667523384094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,4095,0.03797333439191183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,1,0.025413334369659424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,8191,0.059808000922203064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,8191,0.03979199876387914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,1,0.025616000096003216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,1,0.023413332800070446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,1,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,3,0.026373334228992462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,3,0.026602665583292644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,3,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,7,0.02720000098148982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,3,0.023258666197458904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,7,0.027834666272004444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,7,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,7,0.02359466751416524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,15,0.027493332823117573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,15,0.02741866558790207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,15,0.023242667317390442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,15,0.02350933353106181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,31,0.03177600105603536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,31,0.03160000095764796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,31,0.029472000896930695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,31,0.029461334149042766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,63,0.031354665756225586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,63,0.03164800008138021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,63,0.029701332251230877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,63,0.029717333614826202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,127,0.03154666721820831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,127,0.031514666974544525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,127,0.029274667302767437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,127,0.02923733244339625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,255,0.03204799940188726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,255,0.0376800000667572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,255,0.029274667302767437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,255,0.03386666625738144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,511,0.03141333411137263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,511,0.05421866476535797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,511,0.029626667499542236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,1023,0.031983998914559685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,511,0.05081599950790405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,1023,0.029279999434947968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,1023,0.08701866865158081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,1023,0.08307733138402303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,2047,0.03143466760714849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,2047,0.02942933390537898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,2047,0.15267733732859293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,2047,0.14843199650446573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,4095,0.03163733333349228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,4095,0.029535998900731403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,4095,0.2850400010744731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,8191,0.03176533430814743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,4095,0.2796906630198161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,8191,0.0295413335164388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,1,0.026634665826956432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,1,0.027317332724730175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,1,0.023386667172114056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,8191,0.5499200026194254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,1,0.023317334552605946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,3,0.02611200014750163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,8191,0.5437279939651489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,3,0.02550400048494339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,3,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,3,0.023130667706330616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,7,0.02720533311367035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,7,0.02749866743882497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,7,0.02319466571013133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,7,0.02317333221435547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,15,0.02752533306678136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,15,0.02737066646416982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,15,0.02362666775782903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,15,0.023418667415777843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,31,0.03134933362404505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,31,0.02922133356332779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,31,0.03148266673088074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,31,0.0295413335164388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,63,0.03169066707293192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,63,0.0314026673634847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,63,0.02937600016593933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,63,0.029296000798543293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,127,0.031632001201311745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,127,0.03143466760714849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,127,0.02935466667016347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,127,0.02942399928967158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,255,0.031744000812371574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,255,0.03756800045569738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,255,0.029461334149042766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,255,0.03367999941110611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,511,0.03163733333349228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,511,0.053904001911481224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,511,0.029370665550231934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,511,0.050250664353370667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,1023,0.031727999448776245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,1023,0.0867733359336853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,1023,0.029311999678611755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,1023,0.08265066643555959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,2047,0.031701333820819855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,2047,0.1525866687297821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,2047,0.029365333418051403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,2047,0.14877333243687949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,4095,0.031583999594052635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,4095,0.029824001093705494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,4095,0.28379732370376587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,4095,0.2816426753997803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,8191,0.031717332700888314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,8191,0.029792000850041706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,8191,0.5557813247044882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,1,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,1,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,1,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,8191,0.5442026853561401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,3,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,1,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,3,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,3,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,3,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,7,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,7,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,7,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,7,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,15,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,15,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,15,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,15,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,31,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,31,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,31,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,31,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,63,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,63,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,63,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,63,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,127,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,127,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,127,0.011365332951148352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,127,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,255,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,255,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,255,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,255,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,511,0.012746666868527731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,511,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,511,0.011930666863918304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,511,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,1023,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,1023,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,1023,0.011968000481526056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,1023,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,2047,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,2047,0.018906666586796444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,2047,0.018122666825850803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,2047,0.014277332772811254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,4095,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,4095,0.0235359991590182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,4095,0.013408000270525614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,4095,0.023749334116776783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,8191,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,8191,0.04004266609748205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,8191,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,1,0.011541333049535751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,1,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,1,0.012026666353146235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,8191,0.03339733431736628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,1,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,3,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,3,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,3,0.011333333949247995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,3,0.012389333297808966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,7,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,7,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,7,0.012250666817029318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,7,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,15,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,15,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,15,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,31,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,15,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,31,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,31,0.013167999684810638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,31,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,63,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,63,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,63,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,63,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,127,0.012757333616415659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,127,0.013082666943470636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,127,0.011621333658695221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,127,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,255,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,255,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,255,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,255,0.012730666746695837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,511,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,511,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,511,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,511,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,1023,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,1023,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,1023,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,1023,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,2047,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,2047,0.02369066576162974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,2047,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,2047,0.02183466653029124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,4095,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,4095,0.03886399914820989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,4095,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,4095,0.03133333226044973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,8191,0.014783999572197596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,8191,0.06074133515357971
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,8191,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,1,0.009306666751702627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,1,0.009103999783595404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,1,0.009610666582981745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,8191,0.05198400219281515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,1,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,3,0.009296000003814697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,3,0.00878399983048439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,3,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,3,0.009423999736706415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,7,0.00891733355820179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,7,0.009312000125646591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,7,0.010191999996701876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,7,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,15,0.009375999992092451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,15,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,15,0.009103999783595404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,15,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,31,0.009119999905427298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,31,0.008725333337982496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,31,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,31,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,63,0.009535999968647957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,63,0.009994666402538618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,63,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,63,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,127,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,127,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,127,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,127,0.013343999783198038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,255,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,255,0.009232000137368837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,255,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,255,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,511,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,511,0.013461332768201828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,511,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,1023,0.017893332988023758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,1023,0.021055998901526134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,511,0.013594667116800943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,1023,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,1023,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,2047,0.018805333723624546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,2047,0.021189334491888683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,2047,0.025407999753952026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,2047,0.027269333600997925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,4095,0.018751999984184902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,4095,0.03136533250411352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,4095,0.022064000368118286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,4095,0.03580799947182337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,8191,0.017903999735911686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,8191,0.04827199876308441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,8191,0.02125866711139679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,8191,0.04695466657479604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,16383,0.01937066639463107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,16383,0.07653333246707916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,16383,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,16383,0.07291199763615926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,32767,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,32767,0.13377066453297934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,1,0.008709333216150602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,32767,0.021664001047611237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,1,0.008709333216150602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,1,0.010133333504199982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,1,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,32767,0.11990400155385335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,3,0.008938666433095932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,3,0.009008000294367472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,3,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,3,0.010415999839703241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,7,0.009152000149091085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,7,0.00915733352303505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,7,0.010288000106811523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,7,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,15,0.00903466654320558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,15,0.008912000184257826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,15,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,15,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,31,0.008736000085870424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,31,0.009152000149091085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,31,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,31,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,63,0.00916800027092298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,63,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,63,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,63,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,127,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,127,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,127,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,127,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,255,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,255,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,255,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,255,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,511,0.016741332908471424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,511,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,511,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,511,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,1023,0.018805333723624546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,1023,0.021146667500336964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,1023,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,1023,0.01748266691962878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,2047,0.019088000059127808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,2047,0.021344001094500225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,2047,0.025424001117547352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,2047,0.02743999908367793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,4095,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,4095,0.03169066707293192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,4095,0.02107200026512146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,4095,0.035775999228159584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,8191,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,8191,0.04663466910521189
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,8191,0.021375998854637146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,8191,0.047968000173568726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,16383,0.017903999735911686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,16383,0.07654933134714763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,16383,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,16383,0.07122666637102763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,32767,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,32767,0.13424533605575562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,32767,0.021168000996112823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,1,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,1,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,1,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,1,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,3,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,32767,0.11982933680216472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,3,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,3,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,3,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,7,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,7,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,7,0.01032533310353756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,7,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,15,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,15,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,15,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,15,0.010496000448862711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,31,0.010501333822806677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,31,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,31,0.010565333068370819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,63,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,63,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,31,0.011525332927703857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,63,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,127,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,127,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,127,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,127,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,255,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,255,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,255,0.01137599969903628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,255,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,511,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,511,0.01179733375708262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,511,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,511,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,1023,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,1023,0.01156266654531161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,1023,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,1023,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,2047,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,2047,0.011391999820868174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,2047,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,2047,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,4095,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,4095,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,4095,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,4095,0.01246400053302447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,8191,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,8191,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,8191,0.011434666812419891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,8191,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,16383,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,16383,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,16383,0.01883200059334437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,16383,0.018383999665578205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,32767,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,32767,0.01964266722400983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,32767,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,1,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,1,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,1,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,1,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,3,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,3,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,32767,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,3,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,3,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,7,0.010250666489203772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,7,0.009578666960199675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,7,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,7,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,15,0.009408000235756239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,15,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,15,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,15,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,31,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,31,0.00892800030608972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,31,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,31,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,63,0.009226666763424873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,63,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,63,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,63,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,127,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,127,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,127,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,127,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,255,0.01191466674208641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,255,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,255,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,255,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,511,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,511,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,511,0.010581333190202713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,511,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,1023,0.00955200009047985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,1023,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,1023,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,1023,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,2047,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,2047,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,2047,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,2047,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,4095,0.013274667163689932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,4095,0.01009599988659223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,4095,0.010618666807810465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,4095,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,8191,0.01044800008336703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,8191,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,8191,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,8191,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,16383,0.010522666076819101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,16383,0.016832000265518825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,16383,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,16383,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,32767,0.011727999895811081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,32767,0.019088000059127808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,32767,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,1,0.008789333204428354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,1,0.009541333342591921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,32767,0.01882133384545644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,1,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,1,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,3,0.009365333244204521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,3,0.008901333436369896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,3,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,7,0.009301333377758661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,3,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,7,0.008890666688481966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,7,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,7,0.011621333658695221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,15,0.008890666688481966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,15,0.009050666665037474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,15,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,15,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,31,0.00922133338948091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,31,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,31,0.009381333366036415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,31,0.010618666807810465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,63,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,63,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,63,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,63,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,127,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,127,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,127,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,127,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,255,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,255,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,255,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,255,0.012815999488035837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,511,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,511,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,511,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,511,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,1023,0.01960533360640208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,1023,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,1023,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,1023,0.02752533306678136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,2047,0.019391999890406925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,2047,0.02924266705910365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,2047,0.031685332457224526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,2047,0.02334933231274287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,4095,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,4095,0.04019733270009359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,4095,0.02316266546646754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,4095,0.03999999910593033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,8191,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,8191,0.062421331803003945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,8191,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,8191,0.058490668733914696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,16383,0.0216799999276797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,16383,0.10557333628336589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,16383,0.023232000569502514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,16383,0.0965119997660319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,32767,0.019546666493018467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,32767,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,32767,0.19353065888086954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,1,0.009119999905427298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,1,0.00879466657837232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,1,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,1,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,32767,0.16876266400019327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,3,0.009050666665037474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,3,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,3,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,3,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,7,0.00895999992887179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,7,0.009125333279371262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,7,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,7,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,15,0.009312000125646591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,15,0.009072000160813332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,15,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,15,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,31,0.009061333412925402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,31,0.009258666386206945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,31,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,31,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,63,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,63,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,63,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,127,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,127,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,127,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,127,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,255,0.010480000327030817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,255,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,255,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,255,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,511,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,511,0.021168000996112823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,511,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,511,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,1023,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,1023,0.023445333043734234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,1023,0.023621333142121632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,1023,0.027232001225153606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,2047,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,2047,0.029616000751654308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,2047,0.023258666197458904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,4095,0.01937066639463107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,2047,0.031712000568707786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,4095,0.03956799954175949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,4095,0.025013332565625507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,4095,0.040448000033696495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,8191,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,8191,0.06241600215435028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,8191,0.02370133250951767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,8191,0.05820799867312113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,16383,0.01930133377512296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,16383,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,16383,0.10567466417948405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,16383,0.0953653355439504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,32767,0.019424000134070713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,32767,0.1934986710548401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,32767,0.023183998962243397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,1,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,1,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,1,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,1,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,32767,0.16874667008717856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,3,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,3,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,3,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,7,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,7,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,7,0.010565333068370819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,7,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,15,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,15,0.010469333579142889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,15,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,15,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,31,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,31,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,31,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,31,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,63,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,63,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,63,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,63,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,127,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,127,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,127,0.01179733375708262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,127,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,255,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,255,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,255,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,255,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,511,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,511,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,511,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,511,0.012719999998807907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,1023,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,1023,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,1023,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,1023,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,2047,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,2047,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,2047,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,2047,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,4095,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,4095,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,4095,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,4095,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,8191,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,8191,0.01578666642308235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,8191,0.011567999919255575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,8191,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,16383,0.011663999408483505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,16383,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,16383,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,16383,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,32767,0.011519999553759893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,32767,0.020986666282018025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,1,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,32767,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,1,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,1,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,1,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,32767,0.02070933332045873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,3,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,3,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,3,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,7,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,3,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,7,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,7,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,15,0.00914666677514712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,7,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,15,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,15,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,15,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,31,0.010581333190202713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,31,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,31,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,31,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,63,0.010543999572594961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,63,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,63,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,63,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,127,0.01328533391157786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,127,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,127,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,255,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,255,0.010575999816258749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,255,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,255,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,511,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,511,0.011957333733638128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,511,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,511,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,1023,0.011354666203260422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,1023,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,1023,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,1023,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,2047,0.011530666301647821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,2047,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,2047,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,2047,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,4095,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,4095,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,4095,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,4095,0.011488000551859537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,8191,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,8191,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,8191,0.011626667032639185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,8191,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,16383,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,16383,0.01950399950146675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,16383,0.011418666690587997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,16383,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,32767,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,32767,0.02359466751416524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,1,0.04596266647179922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,32767,0.013999999811251959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,1,0.04573333263397217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,1,0.03771200031042099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,32767,0.02202133337656657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,1,0.03777066618204117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,3,0.04558933277924856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,3,0.04610666632652283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,3,0.03774400055408478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,3,0.03792533278465271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,7,0.04607999821503957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,7,0.04594666759173075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,7,0.03976000100374222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,7,0.03972800076007843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,15,0.04799466828505198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,15,0.048170665899912514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,15,0.03995199998219808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,15,0.04014399896065394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,31,0.05618133147557577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,31,0.05613866448402405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,31,0.05032533407211304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,31,0.04984533290068308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,63,0.056474665800730385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,63,0.05659733215967814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,63,0.05011733373006185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,63,0.05021866659323374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,127,0.057909334699312844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,127,0.056287998954455055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,127,0.05013333261013031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,127,0.05030400057633718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,255,0.056032001972198486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,255,0.06689600149790446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,255,0.050069332122802734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,255,0.06039999922116598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,511,0.05765333275000254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,511,0.04993066688378652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,511,0.09738133351008098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,511,0.09131200114885966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,1023,0.05864533285299937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,1023,0.1592693328857422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,1023,0.05012799799442291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,1023,0.1546026666959127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,2047,0.05789866546789805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,2047,0.2858826716740926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,2047,0.05097599824269613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,2047,0.2779360016187032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,4095,0.0580320010582606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,1,0.045893331368764244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,4095,0.05065600077311198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,1,0.046021332343419395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,4095,0.5391519864400228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,1,0.038032000263532005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,1,0.03755199909210205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,4095,0.5295999844868978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,3,0.04577066500981649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,3,0.03788800040880839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,3,0.046021332343419395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,3,0.03751999884843826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,7,0.045978665351867676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,7,0.04610666632652283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,7,0.03991466760635376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,7,0.039605334401130676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,15,0.04775999983151754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,15,0.04788800080617269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,15,0.039877332746982574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,15,0.03992533435424169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,31,0.056101332108179726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,31,0.056032001972198486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,31,0.05017066498597463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,31,0.04996266464392344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,63,0.05628266433874766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,63,0.056602666775385536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,63,0.050341332952181496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,63,0.05041599770387014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,127,0.056218668818473816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,127,0.05609600245952606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,127,0.050154666105906166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,127,0.05000533163547516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,255,0.057258665561676025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,255,0.0666186660528183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,255,0.050197333097457886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,255,0.06020799775918325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,511,0.057946667075157166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,511,0.09847467144330342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,511,0.05009600023428599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,511,0.09126399954160054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,1023,0.056373332937558494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,1023,0.05035733183224996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,1023,0.15892266233762106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,1023,0.1525759994983673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,2047,0.05821333328882853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,2047,0.28590933481852215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,2047,0.05017599960168203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,2047,0.2793440024058024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,4095,0.05787200232346853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,4095,0.05028266708056132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,4095,0.5459786653518677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,1,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,1,0.012223999947309494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,4095,0.5309173266092936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,1,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,1,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,3,0.012847999731699625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,3,0.012703999876976013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,3,0.012768000364303589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,3,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,7,0.013290667285521826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,7,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,7,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,7,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,15,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,15,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,15,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,15,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,31,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,31,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,31,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,31,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,63,0.012773333738247553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,63,0.012170666207869848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,63,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,63,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,127,0.01250133290886879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,127,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,127,0.01267733300725619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,127,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,255,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,255,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,255,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,255,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,511,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,511,0.014762666076421738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,511,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,511,0.014762666076421738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,1023,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,1023,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,1023,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,1023,0.018079999834299088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,2047,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,2047,0.02554133286078771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,2047,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,4095,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,2047,0.02492266645034154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,4095,0.042080000042915344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,4095,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,1,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,4095,0.03541333228349686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,1,0.01381333296497663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,1,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,1,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,3,0.014175999909639359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,3,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,3,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,3,0.01573866605758667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,7,0.013946666071812311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,7,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,7,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,15,0.014533333480358124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,15,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,15,0.014378666877746582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,7,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,15,0.013957332819700241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,31,0.01413333291808764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,31,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,31,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,31,0.01370666672786077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,63,0.013471999516089758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,63,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,63,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,63,0.0141546664138635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,127,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,127,0.01332266628742218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,127,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,127,0.014175999909639359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,255,0.01328533391157786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,255,0.014853333433469137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,255,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,255,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,511,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,511,0.014767999450365702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,511,0.013306666165590286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,511,0.016890666137139004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,1023,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,1023,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,1023,0.021327999730904896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,1023,0.021557333568731945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,2047,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,2047,0.039674667020638786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,2047,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,2047,0.03141866624355316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,4095,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,4095,0.06112533311049143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,1,0.013418667018413544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,4095,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,1,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,4095,0.052570665876070656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,1,0.008816000074148178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,1,0.00927466650803884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,3,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,3,0.01328533391157786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,3,0.009312000125646591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,3,0.00915733352303505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,7,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,7,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,7,0.009136000027259191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,15,0.013386666774749756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,7,0.009573333586255709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,15,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,15,0.009349333122372627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,15,0.00996800015370051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,31,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,31,0.013408000270525614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,31,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,31,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,63,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,63,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,63,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,63,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,127,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,127,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,127,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,127,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,255,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,255,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,255,0.01184533288081487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,255,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,511,0.015626666446526844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,511,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,511,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,511,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,1023,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,1023,0.02123733361562093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,1023,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,1023,0.019002666076024372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,2047,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,2047,0.033471999069054924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,2047,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,2047,0.02773866554101308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,4095,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,4095,0.051914667089780174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,4095,0.011541333049535751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,4095,0.048026666045188904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,8191,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,8191,0.09116266171137492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,8191,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,8191,0.08684266606966655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,16383,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,16383,0.17082667350769043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,16383,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,16383,0.1627786656220754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,32767,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,1,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,32767,0.3269600073496501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,1,0.01332266628742218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,32767,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,1,0.009178666397929192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,1,0.010549332946538925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,3,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,3,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,3,0.010501333822806677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,3,0.009322666873534521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,7,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,32767,0.3296426733334859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,7,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,7,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,7,0.009861333295702934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,15,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,15,0.013477332890033722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,15,0.009610666582981745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,15,0.009402666861812273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,31,0.013631999492645264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,31,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,31,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,31,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,63,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,63,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,63,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,63,0.012784000486135483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,127,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,127,0.01571200042963028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,127,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,127,0.01413333291808764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,255,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,255,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,255,0.012815999488035837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,255,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,511,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,511,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,511,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,511,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,1023,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,1023,0.0233599990606308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,1023,0.012826666235923767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,1023,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,2047,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,2047,0.03162133445342382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,2047,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,2047,0.029440000653266907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,4095,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,4095,0.05203733344872793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,4095,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,4095,0.04823466638724009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,8191,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,8191,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,8191,0.09140800436337788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,8191,0.08502933382987976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,16383,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,16383,0.1700800061225891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,16383,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,16383,0.16261866688728333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,32767,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,32767,0.3266826669375102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,32767,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,1,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,1,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,1,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,1,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,32767,0.32974400122960407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,3,0.010490667074918747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,3,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,3,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,3,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,7,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,7,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,7,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,15,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,15,0.0099093330403169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,15,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,15,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,31,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,31,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,31,0.011354666203260422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,31,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,63,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,63,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,63,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,63,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,127,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,127,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,127,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,127,0.011349332829316458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,255,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,255,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,255,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,255,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,511,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,511,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,511,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,511,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,1023,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,1023,0.011370666325092316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,1023,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,1023,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,2047,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,2047,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,2047,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,2047,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,4095,0.012831999609867731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,4095,0.015925332903862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,4095,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,4095,0.01642666632930438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,8191,0.012752000242471695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,8191,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,8191,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,8191,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,16383,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,16383,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,16383,0.012671999633312225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,16383,0.020917333662509918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,32767,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,32767,0.025466665625572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,1,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,32767,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,1,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,1,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,1,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,3,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,32767,0.025146665672461193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,3,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,3,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,7,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,3,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,7,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,7,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,7,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,15,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,15,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,15,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,15,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,31,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,31,0.009253333633144697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,31,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,31,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,63,0.009557333464423815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,63,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,63,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,63,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,127,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,127,0.010565333068370819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,127,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,127,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,255,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,255,0.009050666665037474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,255,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,255,0.010490667074918747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,511,0.010399999717871347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,511,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,511,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,511,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,1023,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,1023,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,1023,0.01138666644692421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,1023,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,2047,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,2047,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,2047,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,2047,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,4095,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,4095,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,4095,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,4095,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,8191,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,8191,0.017685333887736004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,8191,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,8191,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,16383,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,16383,0.0234400009115537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,16383,0.012389333297808966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,16383,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,32767,0.012810666114091873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,32767,0.037615999579429626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,32767,0.012831999609867731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,1,0.08472533027331035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,1,0.08478933572769165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,32767,0.029701332251230877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,1,0.06874666611353557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,1,0.06826133529345195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,3,0.08468266328175862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,3,0.08473066488901775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,3,0.06843733290831248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,3,0.06915733218193054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,7,0.08707732955614726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,7,0.08676266670227051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,7,0.07258666555086772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,7,0.07260266443093617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,15,0.08867733677228291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,15,0.08874666690826416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,15,0.07444266478220622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,31,0.1055519978205363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,31,0.10559999942779541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,15,0.07503999769687653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,31,0.09293867150942485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,31,0.09316266576449077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,63,0.10729599992434184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,63,0.09479999542236328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,63,0.10714133580525716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,63,0.09492799639701843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,127,0.10756799578666687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,127,0.10750400026639302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,127,0.0952959954738617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,127,0.09486400087674458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,255,0.10759466886520386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,255,0.12618666887283325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,255,0.0953546663125356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,255,0.11359999577204387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,511,0.10754666725794475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,511,0.18768000602722168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,511,0.0944106678167979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,511,0.17517866690953574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,1023,0.1076586643854777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,1023,0.3102026581764221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,1023,0.09533333778381348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,1023,0.29841599861780804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,2047,0.10759466886520386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,2047,0.09499733646710713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,2047,0.5620746612548828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,1,0.08481066425641377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,1,0.08473599950472514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,2047,0.5471306641896566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,1,0.07050133248170216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,1,0.07042666773001353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,3,0.08477866649627686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,3,0.08504533767700195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,3,0.07053333520889282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,3,0.06853333115577698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,7,0.08758399883906047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,7,0.08730666836102803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,7,0.07262399792671204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,7,0.07253866891066234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,15,0.08910399675369263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,15,0.08876267075538635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,15,0.07478400071461995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,15,0.07446399827798207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,31,0.10541866223017375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,31,0.10569066802660625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,31,0.09303466478983562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,31,0.0935040016969045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,63,0.10750400026639302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,63,0.10731200377146403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,63,0.09501333038012187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,63,0.09316266576449077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,127,0.10771200060844421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,127,0.10781866312026978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,127,0.09479467074076335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,127,0.09496000409126282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,255,0.10760533809661865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,255,0.12588266531626383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,255,0.09529067079226176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,255,0.1139306624730428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,511,0.10750933488210042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,511,0.1874879995981852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,511,0.09506133198738098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,511,0.17494400342305502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,1023,0.10771733522415161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,1023,0.09498133261998494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,1023,0.31038399537404376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,1023,0.29803733030955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,2047,0.09500267108281453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,2047,0.10832533240318298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,1,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,2047,0.568069338798523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,1,0.01581866666674614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,1,0.014896000425020853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,1,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,2047,0.5473920106887817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,3,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,3,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,3,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,3,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,7,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,7,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,7,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,7,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,15,0.014815999815861383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,15,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,15,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,15,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,31,0.01479999969402949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,31,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,31,0.015482666591803232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,31,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,63,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,63,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,63,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,63,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,127,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,127,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,127,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,127,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,255,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,255,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,255,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,255,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,511,0.016021333634853363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,511,0.01894933357834816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,511,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,511,0.016773333152135212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,1023,0.01570133368174235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,1023,0.025402667621771496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,1023,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,1023,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,2047,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,2047,0.04165333261092504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,2047,0.017509333789348602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,2047,0.03550933301448822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,1,0.01876266673207283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,1,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,1,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,1,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,3,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,3,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,3,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,3,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,7,0.019487999379634857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,7,0.01754133279124896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,7,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,7,0.017840000490347546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,15,0.017808000246683758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,15,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,15,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,15,0.01800000046690305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,31,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,31,0.019317333896954853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,31,0.017680000513792038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,31,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,63,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,63,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,63,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,63,0.01736533393462499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,127,0.018976000448067982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,127,0.01825599993268649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,127,0.01757866640885671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,127,0.018383999665578205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,255,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,255,0.017535999417304993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,255,0.01942933350801468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,255,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,511,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,511,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,511,0.0183999997874101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,511,0.021583999196688335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,1023,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,1023,0.03572800010442734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,1023,0.019066666563351948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,1023,0.029167999823888142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,2047,0.0210506667693456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,2047,0.05938666562239329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,2047,0.02109866589307785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,2047,0.0499893327554067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,1,0.16110400358835855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,1,0.16106667121251425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,1,0.13091199596722922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,1,0.13210666179656982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,3,0.16179200013478598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,3,0.16059199968973795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,3,0.13198399543762207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,3,0.13205867012341818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,7,0.16671466827392578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,7,0.16648000478744507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,7,0.13638933499654135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,7,0.13617600003878275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,15,0.169487992922465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,15,0.16936000188191733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,15,0.14214932918548584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,15,0.142384002606074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,31,0.20460800329844156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,31,0.20433066288630167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,31,0.17907732725143433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,31,0.17941866318384805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,63,0.2061226765314738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,63,0.18133334318796793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,63,0.20645334323247275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,63,0.1814240018526713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,127,0.20788800716400146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,127,0.20789867639541626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,127,0.18209065993626913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,127,0.18175466855367026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,255,0.20833067099253336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,255,0.24457067251205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,255,0.1813919941584269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,255,0.22032000621159872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,511,0.2097973426183065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,511,0.18125333388646445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,511,0.36766934394836426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,511,0.3431146542231242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,1,0.16085867087046304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,1,0.1606559952100118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,1,0.1316159963607788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,1,0.13194132844607034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,3,0.1611786683400472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,3,0.16078933080037436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,3,0.13195199767748514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,3,0.13184533516565958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,7,0.16726400454839072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,7,0.166810671488444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,7,0.1359946628411611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,7,0.13615999619166055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,15,0.1690559983253479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,15,0.16904000441233316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,15,0.1422826647758484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,15,0.1421280006567637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,31,0.2051466703414917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,31,0.2039573391278585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,31,0.1792693336804708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,31,0.179258664449056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,63,0.20713067054748535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,63,0.20595733324686685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,63,0.18125865856806436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,63,0.18147732814153036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,127,0.20797866582870483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,127,0.20773333311080933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,127,0.1833440065383911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,127,0.1820853352546692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,255,0.20778133471806845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,255,0.24507200717926025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,255,0.18236800034840903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,255,0.22020800908406576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,511,0.20983999967575073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,511,0.18141865730285645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,511,0.367792010307312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,1,0.019007999449968338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,1,0.01934933289885521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,511,0.34355731805165607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,1,0.018965333700180054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,1,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,3,0.020031999796628952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,3,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,3,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,3,0.019589333484570186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,7,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,7,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,7,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,7,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,15,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,15,0.019738666713237762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,15,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,15,0.020768000433842342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,31,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,31,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,31,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,31,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,63,0.019317333896954853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,63,0.01951466624935468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,63,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,63,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,127,0.019413333386182785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,127,0.019402666638294857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,127,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,127,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,255,0.021151999632517498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,255,0.01937066639463107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,255,0.018800000349680584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,255,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,511,0.021322667598724365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,511,0.025445332129796345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,511,0.01937066639463107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,511,0.023530667026837666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,1,0.0271573339899381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,1,0.02737066646416982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,1,0.02535466601451238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,1,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,3,0.027215999861558277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,3,0.027424000203609467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,3,0.025407999753952026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,3,0.02590399980545044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,7,0.025407999753952026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,7,0.025802666942278545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,7,0.025407999753952026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,7,0.02531733363866806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,15,0.025781333446502686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,15,0.025413334369659424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,15,0.02554133286078771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,31,0.025759999950726826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,15,0.02536533276240031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,31,0.025445332129796345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,31,0.025349333882331848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,31,0.025263999899228413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,63,0.025253333151340485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,63,0.025434667865435284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,63,0.025205334027608235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,63,0.02517866591612498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,127,0.02532266577084859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,127,0.02521066615978877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,127,0.025045332809289295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,127,0.025349333882331848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,255,0.026026666164398193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,255,0.025237334271272022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,255,0.025759999950726826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,255,0.025477332373460133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,511,0.02664000044266383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,511,0.03765333443880081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,511,0.025434667865435284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,511,0.03350933392842611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,1,0.31460267305374146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,1,0.25489066044489544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,1,0.31460799773534137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,1,0.2547786633173625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,3,0.314303994178772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,3,0.25464000304539997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,3,0.31481067339579266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,3,0.25524266560872394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,7,0.3245706756909688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,7,0.26528000831604004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,7,0.3245866696039836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,7,0.2653546730677287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,15,0.3310239911079407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,15,0.27717334032058716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,15,0.3309546709060669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,15,0.2773546576499939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,31,0.4018933375676473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,31,0.4007466634114583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,31,0.3530240058898926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,31,0.3526666561762492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,63,0.40653332074483234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,63,0.4062933524449666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,63,0.355679988861084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,63,0.35573867956797284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,127,0.40861864884694415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,127,0.4087680180867513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,127,0.3576586643854777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,127,0.35731732845306396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,255,0.41020798683166504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,255,0.4822666645050049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,255,0.35760001341501874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,1,0.3148053288459778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,255,0.43197333812713623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,1,0.2566293279329936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,1,0.3155200084050496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,1,0.2564799984296163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,3,0.25522132714589435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,3,0.2568746606508891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,3,0.31600000460942584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,3,0.31645333766937256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,7,0.2654719948768616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,7,0.3246613343556722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,7,0.3266666730244954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,7,0.2664213379224141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,15,0.2773600021998088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,15,0.3307360013326009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,15,0.3306559920310974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,15,0.2779146631558736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,31,0.4023040135701497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,31,0.3522453308105469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,31,0.40037333965301514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,31,0.35312533378601074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,63,0.40615999698638916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,63,0.40617601076761883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,63,0.35542933146158856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,63,0.3571999867757161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,127,0.35749868551890057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,127,0.4087306658426921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,127,0.40859198570251465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,127,0.35813331604003906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,255,0.3574133316675822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,255,0.40987733999888104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,255,0.4824320077896118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,255,0.432917316754659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,1,0.029274667302767437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,1,0.029520000020662945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,1,0.029477333029111225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,1,0.029520000020662945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,3,0.029391999046007793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,3,0.02939733366171519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,3,0.02958933264017105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,3,0.029178666571776073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,7,0.029498666524887085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,7,0.029306667546431225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,7,0.02956266701221466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,7,0.029301332930723827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,15,0.0295413335164388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,15,0.029685333371162415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,15,0.029152000943819683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,15,0.02940800040960312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,31,0.02961066613594691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,31,0.02942399928967158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,31,0.029477333029111225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,31,0.029279999434947968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,63,0.02958400050799052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,63,0.029578665892283123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,63,0.029450667401154835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,63,0.029509333272775013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,127,0.02938133229811986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,127,0.029717333614826202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,127,0.02938666691382726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,127,0.029520000020662945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,255,0.031445334355036415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,255,0.029706666866938274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,255,0.029738667110602062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,255,0.02962133288383484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,1,0.04223466912905375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,1,0.042410666743914284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,1,0.040021332601706185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,1,0.03984533250331879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,3,0.041696002086003624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,3,0.04177600145339966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,3,0.03957866628964742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,3,0.03994133323431015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,7,0.041946664452552795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,7,0.04224533339341482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,7,0.03978666663169861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,7,0.039818666875362396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,15,0.04162666698296865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,15,0.043552001317342125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,15,0.03986666599909464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,15,0.04005333284536997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,31,0.04366933306058248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,31,0.04178666571776072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,31,0.03965333352486292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,31,0.039936001102129616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,63,0.0418453315893809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,63,0.04189866781234741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,63,0.03976533313592275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,63,0.039690665900707245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,127,0.04188266893227895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,127,0.041738669077555336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,127,0.03947199881076813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,127,0.03973866750796636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,255,0.04500266909599304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,255,0.043893332282702126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,1,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,255,0.04131733377774557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,1,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,1,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,1,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,3,0.013552000125249227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,255,0.0417546679576238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,3,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,3,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,7,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,3,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,7,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,7,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,7,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,15,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,15,0.013194666554530462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,15,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,15,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,31,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,31,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,31,0.016623999923467636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,31,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,63,0.013455999394257864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,63,0.013861333330472311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,63,0.019754666835069656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,63,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,127,0.014805333067973455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,127,0.01951466624935468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,127,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,127,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,255,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,255,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,255,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,255,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,511,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,511,0.021322667598724365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,511,0.019365333020687103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,511,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,1023,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,1023,0.0314026673634847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,1023,0.019343999524911244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,1023,0.03159466634194056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,2047,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,2047,0.050197333097457886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,2047,0.019509332875410717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,2047,0.050106664498647056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,4095,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,4095,0.09102400143941243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,4095,0.019093333433071773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,4095,0.08746666709582011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,8191,0.014778666198253632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,8191,0.17086933056513467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,8191,0.020288000504175823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,8191,0.1607039968172709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,16383,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,16383,0.3359466791152954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,16383,0.01982933282852173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,16383,0.3104479908943176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,32767,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,1,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,32767,0.019765333582957584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,1,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,1,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,1,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,3,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,3,0.013376000026861826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,3,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,3,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,32767,0.6489173173904419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,32767,0.8924533526102701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,7,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,7,0.01340266689658165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,7,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,7,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,15,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,15,0.013530666629473368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,15,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,31,0.013269333789745966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,15,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,31,0.013306666165590286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,31,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,31,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,63,0.01341333364446958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,63,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,63,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,63,0.019002666076024372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,127,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,127,0.013466666142145792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,127,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,127,0.019685332973798115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,255,0.014757333944241205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,255,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,255,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,255,0.019765333582957584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,511,0.01482133318980535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,511,0.02128000060717265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,511,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,511,0.023370665808518726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,1023,0.014757333944241205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,1023,0.03164266546567281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,1023,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,1023,0.03190933416287104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,2047,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,2047,0.05003733436266581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,2047,0.01951466624935468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,2047,0.05195199946562449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,4095,0.014720000326633453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,4095,0.019461333751678467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,4095,0.09110933542251587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,4095,0.08849066495895386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,8191,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,8191,0.01937066639463107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,8191,0.17099199692408243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,8191,0.16275200247764587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,16383,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,16383,0.3362826506296794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,16383,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,16383,0.3103253245353699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,32767,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,32767,0.019626667102177937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,1,0.011370666325092316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,32767,0.9128479957580566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,1,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,1,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,1,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,3,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,3,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,32767,0.6485386689503988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,3,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,3,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,7,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,7,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,7,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,15,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,15,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,7,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,15,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,15,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,31,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,31,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,31,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,31,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,63,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,63,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,63,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,63,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,127,0.010565333068370819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,127,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,127,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,127,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,255,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,255,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,255,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,255,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,511,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,511,0.011525332927703857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,511,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,511,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,1023,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,1023,0.012309333930412928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,1023,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,1023,0.013376000026861826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,2047,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,2047,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,2047,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,2047,0.015765332927306492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,4095,0.016229332735141117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,4095,0.013194666554530462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,4095,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,4095,0.017093333105246227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,8191,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,8191,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,8191,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,8191,0.019365333020687103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,16383,0.012703999876976013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,16383,0.013194666554530462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,16383,0.025461333493391674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,16383,0.02430933217207591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,32767,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,32767,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,32767,0.04176533222198486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,1,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,32767,0.0335359995563825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,1,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,1,0.010159999753038088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,1,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,3,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,3,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,3,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,3,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,7,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,7,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,7,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,7,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,15,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,15,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,15,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,15,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,31,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,31,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,31,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,31,0.011365332951148352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,63,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,63,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,63,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,63,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,127,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,127,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,127,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,127,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,255,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,255,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,255,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,255,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,511,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,511,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,511,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,511,0.012752000242471695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,1023,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,1023,0.012703999876976013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,1023,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,1023,0.012576000144084295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,2047,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,2047,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,2047,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,2047,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,4095,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,4095,0.012778667112191519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,4095,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,4095,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,8191,0.012671999633312225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,8191,0.0232640008131663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,8191,0.012762666990359625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,8191,0.023013333479563396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,16383,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,16383,0.03761066744724909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,16383,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,16383,0.03130666663249334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,32767,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,32767,0.05891199906667074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,32767,0.013365333278973898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,32767,0.05194666484991709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,float16,1,0.6236266692479452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,float16,1,0.6231040159861246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,fp8,1,0.49971731503804523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,fp8,1,0.5011039972305298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,float16,3,0.6231306791305542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,float16,3,0.6230186621348063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,fp8,3,0.5009119908014933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,fp8,3,0.5017973184585571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,float16,7,0.6425546805063883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,float16,7,0.6419680118560791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,fp8,7,0.5250240166982015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,fp8,7,0.5238080024719238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,float16,15,0.6551573276519775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,float16,15,0.6546453237533569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,fp8,15,0.5475519895553589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,fp8,15,0.5474986632664999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,float16,31,0.79585067431132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,float16,31,0.7955146630605062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,fp8,31,0.6976532936096191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,fp8,31,0.6978240013122559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,float16,63,0.8046133518218994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,float16,63,0.8041706879933676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,fp8,63,0.7055573463439941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,fp8,63,0.7051200071970621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,float16,127,0.8115253448486328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,float16,127,0.81223464012146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,fp8,127,0.7088212966918945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,fp8,127,0.7077706654866537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,float16,1,0.6234773397445679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,float16,1,0.6232639948527018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,fp8,1,0.5049546559651693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,fp8,1,0.5044373273849487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,float16,3,0.6237653493881226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,float16,3,0.623738686243693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,fp8,3,0.5048799912134806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,fp8,3,0.5043253501256307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,float16,7,0.6425120035807291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,float16,7,0.644159992535909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,fp8,7,0.5252586603164673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,fp8,7,0.5250453154246012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,float16,15,0.6546560128529867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,fp8,15,0.5477120081583658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,float16,15,0.6553546587626139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,fp8,15,0.5476053158442179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,float16,31,0.7958026727040609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,fp8,31,0.6976373195648193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,float16,31,0.7976160049438477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,fp8,31,0.6977120240529379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,float16,63,0.8044693470001221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,float16,63,0.8060853481292725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,fp8,63,0.7053173383076986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,fp8,63,0.7053759892781576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,float16,127,0.8125813007354736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,float16,127,0.8108853499094645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,fp8,127,0.707909345626831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,fp8,127,0.7093386650085449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,float16,1,0.049813335140546165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,float16,1,0.05013866722583771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,fp8,1,0.04775466521581014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,fp8,1,0.04841066896915436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,float16,3,0.050010666251182556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,float16,3,0.04985600213209788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,fp8,3,0.048112000028292336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,fp8,3,0.04785599807898203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,float16,7,0.04982399940490723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,float16,7,0.0499893327554067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,fp8,7,0.048026666045188904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,fp8,7,0.04806933303674062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,float16,15,0.050053333242734276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,float16,15,0.05012799799442291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,fp8,15,0.04794666667779287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,fp8,15,0.04784533381462097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,float16,31,0.050154666105906166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,fp8,31,0.048165331284205117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,float16,31,0.05008533100287119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,fp8,31,0.048197334011395775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,float16,63,0.050848002235094704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,fp8,63,0.04822400212287903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,float16,63,0.050442665815353394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,fp8,63,0.04784533381462097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,float16,127,0.051957334081331887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,float16,127,0.0507893313964208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,fp8,127,0.04818666477998098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,float16,1,0.07860266665617625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,float16,1,0.07811200122038524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,fp8,127,0.047685335079828896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,fp8,1,0.07054933408896129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,fp8,1,0.07048533360163371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,float16,3,0.07673066854476929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,float16,3,0.07784000039100647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,fp8,3,0.07038400073846181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,fp8,3,0.07037333150704701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,float16,7,0.07669333120187123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,float16,7,0.07794666786988576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,fp8,7,0.07028266787528992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,fp8,7,0.07067200044790904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,float16,15,0.07714133461316426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,float16,15,0.07750399907430013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,fp8,15,0.07065066695213318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,fp8,15,0.07056533296902974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,float16,31,0.07681066791216533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,float16,31,0.07796266674995422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,fp8,31,0.07050666709740956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,fp8,31,0.07045333087444305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,float16,63,0.07735466460386912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,float16,63,0.07678933441638947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,fp8,63,0.07066133121649425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,fp8,63,0.07073600093523662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,float16,127,0.07796266674995422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,float16,127,0.07709333300590515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,fp8,127,0.07028266787528992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,fp8,127,0.0705866664648056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,fp8,1,0.9909546375274658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,float16,1,1.2377653121948242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,float16,1,1.238165299097697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,fp8,1,0.9934933185577393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,fp8,3,0.9922826290130615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,float16,3,1.2377013365427654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,float16,3,1.2388959725697835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,fp8,3,0.9945653279622396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,fp8,7,1.039690653483073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,float16,7,1.2778240044911702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,float16,7,1.2776319980621338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,fp8,7,1.04039470354716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,float16,15,1.302890698115031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,fp8,15,1.0858826637268066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,float16,15,1.3033173084259033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,fp8,15,1.0875786940256755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,fp8,31,1.3871893882751465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,float16,31,1.583573341369629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,float16,31,1.5843413670857747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,fp8,31,1.3879839579264324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,fp8,63,1.4039306640625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,fp8,63,1.4021706581115723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,float16,63,1.6024160385131836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,float16,63,1.6020533243815105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,fp8,1,0.9990932941436768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,fp8,1,0.9992746512095133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,float16,1,1.2409706910451253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,float16,1,1.237546682357788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,fp8,3,1.0010186831156414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,fp8,3,0.9992427031199137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,float16,3,1.240069309870402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,float16,3,1.2398239771525066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,fp8,7,1.0411413510640461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,float16,7,1.2787840366363525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,fp8,7,1.0416106383005779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,float16,15,1.3037280241648357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,fp8,15,1.0880320072174072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,fp8,15,1.0883893171946208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,float16,7,1.282805363337199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,float16,15,1.3027946949005127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,float16,31,1.58570130666097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,float16,31,1.5858240127563477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,fp8,31,1.3898293177286785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,fp8,31,1.3880693117777507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,fp8,63,1.4043466250101726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,float16,63,1.60807466506958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,float16,63,1.6122239430745442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,float16,1,0.09296000003814697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,float16,1,0.09285333752632141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,fp8,63,1.4031519889831543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,fp8,1,0.08721066514650981
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,float16,3,0.09314133723576863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,float16,3,0.09305066863695781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,fp8,3,0.08669333656628926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,fp8,3,0.08685333530108134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,float16,7,0.09291733304659526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,float16,7,0.09303466478983562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,fp8,7,0.08689600229263306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,fp8,7,0.08578133583068848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,float16,15,0.09355733791987102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,float16,15,0.09309333562850952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,fp8,1,0.08719999591509502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,fp8,15,0.08563733100891113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,fp8,15,0.08698667089144389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,float16,31,0.09311466415723164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,float16,31,0.09312533338864644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,fp8,31,0.08619200189908345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,fp8,31,0.08695466319719951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,float16,63,0.09317333499590556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,float16,63,0.09310932954152425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,fp8,63,0.08552533388137817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,fp8,63,0.08687466382980347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,float16,1,0.14436800281206766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,float16,1,0.1443946659564972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,fp8,1,0.13215999801953635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,fp8,1,0.13223999738693237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,float16,3,0.1444533367951711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,float16,3,0.14474667112032572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,fp8,3,0.1320693294207255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,fp8,3,0.13199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,float16,7,0.14442666371663412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,float16,7,0.14411200086275736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,fp8,7,0.13218667109807333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,fp8,7,0.13218667109807333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,float16,15,0.1442453364531199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,float16,15,0.1442506710688273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,fp8,15,0.1329919993877411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,fp8,15,0.13198399543762207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,float16,31,0.14421332875887552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,float16,31,0.1441920002301534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,fp8,31,0.13199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,fp8,31,0.13196266690889993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,float16,63,0.14416533708572388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,1,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,float16,63,0.1442293326059977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,fp8,63,0.1321333348751068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,1,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,1,0.01481066644191742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,1,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,3,0.0185759998857975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,3,0.01759999990463257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,3,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,3,0.014837333311637243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,7,0.017818666994571686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,7,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,7,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,7,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,15,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,15,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,15,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,fp8,63,0.1321333348751068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,15,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,31,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,31,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,31,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,31,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,63,0.01953599974513054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,63,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,63,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,63,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,127,0.019434666881958645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,127,0.01926933353145917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,127,0.016890666137139004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,127,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,255,0.019551999866962433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,255,0.02203733225663503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,255,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,255,0.020629333953062694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,511,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,511,0.02998399982849757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,511,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,511,0.02942933390537898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,1023,0.019493332753578823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,1023,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,1023,0.04765866696834564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,1023,0.04611733555793762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,2047,0.019050666441520054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,2047,0.08053866525491078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,2047,0.017642666896184284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,2047,0.07890666524569194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,4095,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,4095,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,4095,0.14882133404413858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,4095,0.1467359960079193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,8191,0.019093333433071773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,8191,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,8191,0.28569066524505615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,8191,0.2816373308499654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,16383,0.019658666104078293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,16383,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,16383,0.5870826641718546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,1,0.017509333789348602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,1,0.0176959993938605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,1,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,1,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,16383,0.5934186776479086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,3,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,3,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,3,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,3,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,7,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,7,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,7,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,7,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,15,0.017407999684413273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,15,0.017717332889636356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,15,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,15,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,31,0.01978133370478948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,31,0.019317333896954853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,31,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,31,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,63,0.019424000134070713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,63,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,63,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,63,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,127,0.019093333433071773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,127,0.019359999646743137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,127,0.017551999539136887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,127,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,255,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,255,0.023034666975339253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,255,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,255,0.020949333906173706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,511,0.019834666202465694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,511,0.030752000709374745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,511,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,511,0.029535998900731403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,1023,0.019088000059127808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,1023,0.047839999198913574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,1023,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,1023,0.04587199787298838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,2047,0.01933866615096728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,2047,0.08125866452852885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,2047,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,2047,0.08035733302434285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,4095,0.019567999988794327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,4095,0.14865600069363913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,4095,0.017450666675964992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,4095,0.14818666378657022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,8191,0.019493332753578823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,8191,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,8191,0.2842453320821126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,8191,0.283077339331309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,16383,0.019578666736682255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,1,0.011349332829316458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,16383,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,1,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,16383,0.603551983833313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,1,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,1,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,3,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,3,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,3,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,3,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,7,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,16383,0.5958933432896932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,7,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,7,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,7,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,15,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,15,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,15,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,15,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,31,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,31,0.010490667074918747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,31,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,31,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,63,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,63,0.010549332946538925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,63,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,63,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,127,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,127,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,127,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,127,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,255,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,255,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,255,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,255,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,511,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,511,0.012693333129088083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,511,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,511,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,1023,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,1023,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,1023,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,1023,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,2047,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,2047,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,2047,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,2047,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,4095,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,4095,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,4095,0.013194666554530462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,4095,0.018911999960740406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,8191,0.012778667112191519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,8191,0.025429333249727886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,8191,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,8191,0.02332799881696701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,16383,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,16383,0.04164266586303711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,16383,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,1,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,1,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,1,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,16383,0.0352906659245491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,1,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,3,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,3,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,3,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,3,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,7,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,7,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,7,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,7,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,15,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,15,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,15,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,15,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,31,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,31,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,31,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,31,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,63,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,63,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,63,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,63,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,127,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,127,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,127,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,127,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,255,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,255,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,255,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,255,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,511,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,511,0.011434666812419891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,511,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,511,0.011370666325092316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,1023,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,1023,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,1023,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,1023,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,2047,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,2047,0.01754133279124896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,2047,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,2047,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,4095,0.01351999988158544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,4095,0.023050665855407715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,4095,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,4095,0.02109333376089732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,8191,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,8191,0.03749866783618927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,8191,0.01357866699496905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,8191,0.02941333254178365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,16383,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,16383,0.013194666554530462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,16383,0.05829333265622457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,1,0.025616000096003216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,16383,0.05007466673851013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,1,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,3,0.02717866748571396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,3,0.023984000086784363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,7,0.026586666703224182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,7,0.023402666052182514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,15,0.03152533372243246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,31,0.03133333226044973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,31,0.029258665939172108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,15,0.028229333460330963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,63,0.031530665854612984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,63,0.029253333806991577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,127,0.03568000098069509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,255,0.05198400219281515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,127,0.03325333446264267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,255,0.050053333242734276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,511,0.08525866270065308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,511,0.08299200236797333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,1023,0.15189866224924722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,1023,0.1465226709842682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,2047,0.2818079988161723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,2047,0.2751413385073344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,4095,0.54585067431132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,4095,0.5296159982681274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,1,0.025648000339667004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,1,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,3,0.02720533311367035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,8191,1.076645294825236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,3,0.0233599990606308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,7,0.027162666122118633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,8191,1.0423093636830647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,7,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,15,0.031658666829268135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,15,0.029093332588672638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,31,0.031328000128269196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,31,0.02924266705910365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,63,0.03158933420976003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,63,0.02917333443959554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,127,0.036858665446440377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,127,0.03363733241955439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,255,0.05221866567929586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,255,0.049957334995269775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,511,0.0851200024286906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,511,0.08302400012811025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,1023,0.15251200397809347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,1023,0.1467413306236267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,2047,0.2824160059293111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,2047,0.27503466606140137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,4095,0.5510293245315552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,4095,0.5281493266423544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,1,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,1,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,8191,1.1049333413441975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,3,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,3,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,7,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,8191,1.0509599844614665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,15,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,7,0.011333333949247995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,15,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,31,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,31,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,63,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,127,0.011381333072980246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,63,0.011440000186363855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,127,0.011450666934251785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,255,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,255,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,511,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,511,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,1023,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,2047,0.020938667158285778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,2047,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,1023,0.013738666971524557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,4095,0.03910933434963226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,4095,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,1,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,8191,0.060309335589408875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,1,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,8191,0.0400693342089653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,3,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,7,0.013381333400805792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,3,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,15,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,7,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,15,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,31,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,31,0.011648000528415045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,63,0.013418667018413544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,63,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,127,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,127,0.011962667107582092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,255,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,255,0.01249066616098086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,511,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,511,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,1023,0.019093333433071773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,1023,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,2047,0.03905066599448522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,2047,0.02348266790310542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,4095,0.060592000683148704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,4095,0.039488000174363456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,1,0.008879999940594038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,1,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,8191,0.10331199566523235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,3,0.00897066667675972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,8191,0.059749335050582886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,3,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,7,0.008576000109314919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,7,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,15,0.00922133338948091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,15,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,31,0.009279999881982803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,31,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,63,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,63,0.012634667257467905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,127,0.009039999917149544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,127,0.012671999633312225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,255,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,255,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,511,0.021327999730904896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,511,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,1023,0.02516266703605652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,1023,0.027808000644048054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,2047,0.03165333221356074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,2047,0.03389866650104523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,4095,0.04798933366934458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,4095,0.04595733185609182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,8191,0.07659199833869934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,8191,0.0706826647122701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,16383,0.1341333289941152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,16383,0.11966933806737264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,1,0.009029333169261614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,1,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,32767,0.25286932786305744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,3,0.008869333192706108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,3,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,32767,0.21587733427683511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,7,0.008816000074148178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,7,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,15,0.009279999881982803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,15,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,31,0.010389333590865135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,31,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,63,0.009039999917149544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,63,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,127,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,127,0.012762666990359625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,255,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,255,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,511,0.02107733239730199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,511,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,1023,0.025370667378107708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,1023,0.027402666707833607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,2047,0.03365866591533025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,2047,0.03388266762097677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,4095,0.04739200075467428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,4095,0.04597333570321401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,8191,0.07666666805744171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,8191,0.07055999835332234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,16383,0.13517333070437113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,16383,0.1197653313477834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,1,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,1,0.010565333068370819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,32767,0.2511199911435445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,3,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,3,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,7,0.01044800008336703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,32767,0.21620800097783408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,7,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,15,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,31,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,15,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,31,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,63,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,63,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,127,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,127,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,255,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,255,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,511,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,511,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,1023,0.012730666746695837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,1023,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,2047,0.011952000359694162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,2047,0.012655999511480331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,4095,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,4095,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,8191,0.01736533393462499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,8191,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,16383,0.019914666811625164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,16383,0.019578666736682255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,1,0.010389333590865135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,32767,0.023247999449570973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,1,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,3,0.011338666081428528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,32767,0.021418665846188862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,3,0.010490667074918747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,7,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,7,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,15,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,15,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,31,0.010575999816258749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,31,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,63,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,63,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,127,0.010501333822806677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,127,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,255,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,511,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,255,0.010405333091815313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,511,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,1023,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,1023,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,2047,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,2047,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,4095,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,4095,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,8191,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,8191,0.016890666137139004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,16383,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,16383,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,1,0.00922133338948091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,32767,0.02309866746266683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,1,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,3,0.009354666496316591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,32767,0.019498666127522785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,3,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,7,0.00933333362142245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,7,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,15,0.008832000195980072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,15,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,31,0.009408000235756239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,31,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,63,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,63,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,127,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,127,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,255,0.021141332884629566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,255,0.013253333667914072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,511,0.0233599990606308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,511,0.026816000541051228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,1023,0.031317333380381264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,1023,0.029530666768550873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,2047,0.03968533376852671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,2047,0.03976000100374222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,4095,0.06257066627343495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,4095,0.0581226646900177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,8191,0.10600533088048299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,8191,0.09542399644851685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,16383,0.19363733132680258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,16383,0.168938676516215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,1,0.009514666472872099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,1,0.010543999572594961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,32767,0.36994131406148273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,3,0.009162666896979014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,32767,0.312879999478658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,3,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,7,0.009103999783595404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,7,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,15,0.008767999708652496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,15,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,31,0.00916800027092298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,31,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,63,0.00903466654320558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,63,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,127,0.009045333291093508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,127,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,255,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,255,0.02126399924357732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,511,0.023354666928450268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,511,0.02531733363866806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,1023,0.029472000896930695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,1023,0.0314026673634847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,2047,0.039594667653242745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,2047,0.039706667264302574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,4095,0.0625600020090739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,4095,0.0584853341182073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,8191,0.10646933317184448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,8191,0.09517332911491394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,16383,0.19364267587661743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,16383,0.16870399316151938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,1,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,1,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,32767,0.3692320187886556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,3,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,3,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,7,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,32767,0.31225067377090454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,7,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,15,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,15,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,31,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,31,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,63,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,127,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,63,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,127,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,255,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,255,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,511,0.012709333250919977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,511,0.01166933278242747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,1023,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,1023,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,2047,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,4095,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,2047,0.013749333719412485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,4095,0.015615999698638916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,8191,0.018917333334684372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,8191,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,16383,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,16383,0.021151999632517498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,1,0.010549332946538925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,1,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,32767,0.023445333043734234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,32767,0.021269333859284718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,3,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,3,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,7,0.010485333700974783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,7,0.011349332829316458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,15,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,15,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,31,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,31,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,63,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,63,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,127,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,127,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,255,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,255,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,511,0.011381333072980246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,511,0.01240533341964086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,1023,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,1023,0.011578666667143503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,2047,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,2047,0.01458666721979777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,4095,0.01597333326935768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,4095,0.016336000214020412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,8191,0.017749333133300144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,8191,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,16383,0.021365332106749218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,16383,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,1,0.043738668163617454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,32767,0.038106667498747505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,32767,0.023621333142121632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,1,0.037861332297325134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,3,0.04586666822433472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,3,0.03973866750796636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,7,0.045994664231936135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,7,0.040031999349594116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,15,0.05643199880917867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,15,0.04975999891757965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,31,0.05638400216897329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,31,0.04994666576385498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,63,0.05601066847642263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,63,0.04991999765237173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,127,0.06463466584682465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,127,0.060229331254959106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,255,0.09697066744168599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,255,0.09124267101287842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,511,0.15879467129707336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,511,0.15245333313941956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,1023,0.28359999259312946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,1023,0.27275200684865314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,2047,0.5348159869511923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,2047,0.5147999922434489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,1,0.04428799947102865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,1,0.03794133414824804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,4095,1.0451146761576335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,3,0.04584533472855886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,3,0.03979199876387914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,4095,1.0007893244425456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,7,0.045893331368764244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,7,0.03985599925120672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,15,0.0561706672112147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,15,0.050197333097457886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,31,0.05596800148487091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,31,0.049882665276527405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,63,0.05589333176612854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,127,0.0644053320089976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,63,0.051039998730023704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,127,0.060266668597857155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,255,0.09701866904894511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,255,0.09103999535242717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,511,0.15228266517321268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,511,0.15891733765602112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,1023,0.28355199098587036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,1023,0.2714879910151164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,2047,0.5423786640167236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,2047,0.5154933134714762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,1,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,1,0.01360000049074491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,3,0.013248000293970108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,7,0.013429333766301474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,3,0.013653332988421122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,4095,1.0623359680175781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,4095,1.0061493714650471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,7,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,15,0.013450667262077332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,15,0.013424000392357508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,31,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,31,0.013568000247081121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,63,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,127,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,63,0.013424000392357508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,127,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,255,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,255,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,511,0.015504000087579092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,511,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,1023,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,1023,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,2047,0.04009599983692169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,2047,0.027664000789324444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,4095,0.061039999127388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,1,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,4095,0.04200000067551931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,1,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,3,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,3,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,7,0.014831999937693277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,7,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,15,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,15,0.014837333311637243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,31,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,31,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,63,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,63,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,127,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,127,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,255,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,255,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,511,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,511,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,1023,0.03716266651948293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,1023,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,2047,0.06066666543483734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,2047,0.041493333876132965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,4095,0.10406399766604106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,1,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,4095,0.061237335205078125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,3,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,1,0.009205333267649015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,3,0.009098666409651438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,7,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,7,0.00902399979531765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,15,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,15,0.01341333364446958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,31,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,31,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,63,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,63,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,127,0.01543466622630755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,127,0.012671999633312225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,255,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,255,0.014826666563749313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,511,0.02146666745344798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,511,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,1023,0.02849599967400233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,1023,0.03162133445342382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,2047,0.05180266499519348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,2047,0.04799999793370565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,4095,0.09114666779836018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,4095,0.08514666557312012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,8191,0.16888533035914102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,8191,0.1611786683400472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,16383,0.3265119989713033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,16383,0.310810665289561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,1,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,1,0.009248000259200731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,3,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,32767,0.640554666519165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,3,0.009392000113924345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,7,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,7,0.009152000149091085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,15,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,15,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,32767,0.695909341176351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,31,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,31,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,63,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,63,0.013343999783198038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,127,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,127,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,255,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,255,0.014826666563749313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,511,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,511,0.021312000850836437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,1023,0.031845333675543465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,1023,0.029498666524887085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,2047,0.05222400029500326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,2047,0.04788800080617269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,4095,0.09105066458384196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,4095,0.08518933256467183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,8191,0.1606773336728414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,8191,0.1697173317273458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,16383,0.32658666372299194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,16383,0.3110293348630269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,1,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,1,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,3,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,3,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,7,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,7,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,15,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,32767,0.6484693288803101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,15,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,31,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,32767,0.6963253021240234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,31,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,63,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,63,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,127,0.011503999431928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,255,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,127,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,255,0.011557333171367645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,511,0.012794667234023413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,511,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,1023,0.012736000120639801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,1023,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,2047,0.0161920003592968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,2047,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,4095,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,4095,0.017488000293572743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,8191,0.021242665747801464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,8191,0.018853332847356796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,16383,0.02359466751416524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,16383,0.02330133318901062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,1,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,32767,0.04081599911053976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,1,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,32767,0.027386667827765148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,3,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,3,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,7,0.010618666807810465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,7,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,15,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,15,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,31,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,31,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,63,0.011418666690587997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,63,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,127,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,127,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,255,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,255,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,511,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,511,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,1023,0.012096000214417776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,1023,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,2047,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,2047,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,4095,0.01746133342385292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,4095,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,8191,0.02103466788927714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,8191,0.018874666343132656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,16383,0.03839466720819473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,16383,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,1,0.08297066887219746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,32767,0.06057066718737284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,32767,0.037871999045213066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,1,0.0703359991312027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,3,0.08513066172599792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,3,0.07241066793600719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,7,0.08689066767692566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,7,0.07486933469772339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,15,0.10501866539319356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,15,0.09306666254997253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,31,0.10522666573524475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,31,0.09311999877293904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,63,0.1053653359413147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,63,0.09336533149083455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,127,0.12389866511027019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,127,0.11386133233706157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,255,0.18550399939219156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,255,0.17449599504470825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,511,0.30844799677530926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,511,0.29447466135025024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,1023,0.5581706762313843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,1023,0.5333760182062784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,1,0.08291199803352356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,1,0.07032000025113423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,3,0.08501866459846497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,2047,1.0660906632741292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,3,0.07258666555086772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,7,0.08690667152404785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,7,0.07469333211580913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,15,0.10556800166765849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,2047,1.0176053047180176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,15,0.09303999940554301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,31,0.10513066252072652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,31,0.09313066800435384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,63,0.09314666191736858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,63,0.10622400045394897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,127,0.12410133083661397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,127,0.11380267143249512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,255,0.18522665898005167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,255,0.17539199193318686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,511,0.3083999951680501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,511,0.2949440081914266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,1023,0.5662399927775065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,1023,0.5347093343734741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,1,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,1,0.015594666202863058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,3,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,2047,1.0792373021443684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,3,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,2047,1.0313599904378254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,7,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,7,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,15,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,15,0.01609066625436147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,31,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,31,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,63,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,63,0.016074666132529575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,127,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,127,0.015706667055686314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,255,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,255,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,511,0.021354667842388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,511,0.018965333700180054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,1023,0.03736533224582672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,1023,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,2047,0.06262933214505513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,1,0.02102400114138921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,2047,0.04322666426499685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,1,0.01836799954374631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,3,0.021087999145189922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,3,0.01877333347996076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,7,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,7,0.018986667195955913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,15,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,15,0.017845333864291508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,31,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,31,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,63,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,63,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,127,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,127,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,255,0.021194666624069214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,255,0.017653333644072216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,511,0.035418666899204254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,511,0.023402666052182514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,1023,0.05654400090376536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,1023,0.03579200059175491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,2047,0.10013866424560547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,2047,0.059119999408721924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,1,0.16058133045832315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,1,0.13223466277122498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,3,0.16474666198094687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,3,0.1372106671333313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,7,0.1669600009918213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,7,0.1426293353239695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,15,0.20179200172424316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,15,0.17920533816019693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,31,0.20188266038894653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,31,0.17919999361038208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,63,0.20385066668192545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,63,0.18111467361450195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,127,0.24074133237202963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,127,0.22022400299708048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,255,0.3638933499654134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,255,0.3410986661911011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,1,0.13247999548912048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,511,0.6101706822713217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,1,0.15980266531308493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,511,0.5809226830800375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,3,0.16476800044377646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,3,0.13798399766286215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,7,0.16740800937016806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,7,0.1426346699396769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,15,0.20283200343449911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,15,0.17907732725143433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,31,0.1808533271153768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,31,0.20201067129770914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,63,0.20388267437616983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,63,0.1811093290646871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,127,0.24041599035263062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,127,0.22035199403762817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,255,0.36369601885477704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,255,0.34119999408721924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,1,0.021381333470344543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,1,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,511,0.6244213183720907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,511,0.5807146628697714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,3,0.023503998915354412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,3,0.021002667645613354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,7,0.02312533309062322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,7,0.02142400046189626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,15,0.023205332458019257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,15,0.021365332106749218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,31,0.023152001202106476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,63,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,31,0.021359999974568684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,63,0.021226666867733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,127,0.023247999449570973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,127,0.022709332406520844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,255,0.021482666333516438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,255,0.023386667172114056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,511,0.037471999724706016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,1,0.029648000995318096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,511,0.027376001079877216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,1,0.02789866675933202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,3,0.031301334500312805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,3,0.0273333340883255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,7,0.031311998764673867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,7,0.027642667293548584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,15,0.03145066648721695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,15,0.027221334477265675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,31,0.0315733328461647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,31,0.02741333345572154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,63,0.03151999910672506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,63,0.0272533322374026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,127,0.03127466638882955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,127,0.027450665831565857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,255,0.03793599953254064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,255,0.027450665831565857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,511,0.058543999989827476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,511,0.0396373321612676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,1,0.31250667572021484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,1,0.2569546699523926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,3,0.3224746584892273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,3,0.2674773335456848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,7,0.3283573389053345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,7,0.27935999631881714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,15,0.3984533150990804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,15,0.3511893351872762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,31,0.398362676302592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,31,0.35307733217875165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,63,0.4007360140482585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,63,0.3550879955291748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,127,0.4741706848144531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,127,0.4322346846262614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,255,0.7194666862487793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,1,0.31251732508341473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,255,0.6739786465962728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,1,0.2569013237953186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,3,0.3224586645762126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,3,0.2672160069147746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,7,0.27958399057388306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,7,0.3290666739145915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,15,0.39846932888031006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,15,0.35133334000905353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,31,0.39787201086680096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,63,0.4005226691563924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,31,0.3535253206888835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,63,0.3552480141321818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,127,0.4740320046742757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,127,0.43145068486531574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,255,0.6744480133056641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,255,0.7437386512756348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,1,0.03612799942493439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,1,0.03364799916744232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,3,0.03738666574160258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,3,0.033386667569478355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,7,0.0360000009338061
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,7,0.03359466542800268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,15,0.03752533346414566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,15,0.03363200028737386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,31,0.035989334185918175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,31,0.033546666304270424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,63,0.037605332831541695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,63,0.033573334415753685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,127,0.03350933392842611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,127,0.0374293327331543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,255,0.04375466704368591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,255,0.0341386670867602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,1,0.052015999952952065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,1,0.044480000933011375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,3,0.05231466889381409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,3,0.04578666885693868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,7,0.051776001850763954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,7,0.04420800010363261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,15,0.052416001756985985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,15,0.04582933088143667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,31,0.052015999952952065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,31,0.04453866680463155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,63,0.052298665046691895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,63,0.04569066564242045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,127,0.05514133473237356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,127,0.04587199787298838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,1,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,1,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,3,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,255,0.06447466711203258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,3,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,7,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,255,0.048453330993652344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,7,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,15,0.013343999783198038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,15,0.016810666769742966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,31,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,31,0.018933333456516266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,63,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,63,0.019482667247454327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,127,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,127,0.019013332823912304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,255,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,255,0.022991999983787537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,511,0.02975466599067052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,511,0.03188266605138779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,1023,0.050474668542544045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,1023,0.0510453333457311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,2047,0.08714133501052856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,2047,0.09109866619110107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,4095,0.16077333688735962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,4095,0.16990933815638223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,8191,0.3068959911664327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,8191,0.3288373351097107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,16383,0.5996426741282145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,16383,0.6544586817423502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,1,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,1,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,3,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,3,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,7,0.013237333546082178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,7,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,15,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,32767,1.3779786427815754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,15,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,31,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,31,0.019413333386182785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,32767,1.8657654126485188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,63,0.013248000293970108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,63,0.019391999890406925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,127,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,127,0.019679999599854153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,255,0.01930133377512296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,255,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,511,0.031541332602500916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,511,0.03138133386770884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,1023,0.04976533353328705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,1023,0.05106133222579956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,2047,0.08909866213798523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,2047,0.08694932858149211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,4095,0.17150932550430298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,4095,0.16083733240763345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,8191,0.33291200796763104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,8191,0.30828799804051715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,16383,0.6640746593475342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,16383,0.5991893212000529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,1,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,1,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,3,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,3,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,7,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,7,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,15,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,15,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,31,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,32767,1.4221332867940266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,31,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,32767,2.359802722930908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,63,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,63,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,127,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,127,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,255,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,255,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,511,0.012319999436537424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,511,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,1023,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,1023,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,2047,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,2047,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,4095,0.018138666947682697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,4095,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,8191,0.021456000705560047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,8191,0.019600000232458115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,16383,0.02548266698916753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,16383,0.04091733445723852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,1,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,1,0.011424000064531961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,3,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,32767,0.0606826643149058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,32767,0.041706666350364685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,7,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,3,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,15,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,7,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,15,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,31,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,31,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,63,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,127,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,63,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,127,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,255,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,255,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,511,0.012703999876976013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,511,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,1023,0.013301332791646322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,1023,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,2047,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,2047,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,4095,0.02163733293612798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,4095,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,8191,0.03965333352486292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,8191,0.02309866746266683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,16383,0.060378665725390114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,16383,0.03826133410135905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,32767,0.10505066315333049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,32767,0.05843733251094818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,fp8,1,0.5047200123469034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,float16,1,0.618559996287028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,fp8,3,0.5266826550165812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,float16,3,0.6389653285344442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,fp8,7,0.5517333348592123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,float16,7,0.6487306753794352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,fp8,15,0.6949546337127686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,float16,15,0.7891786893208822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,float16,31,0.7901973724365234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,fp8,31,0.6975146929423014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,float16,63,0.794432004292806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,fp8,63,0.7021439870198568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,float16,127,0.9435733159383138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,fp8,127,0.8564159870147705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,float16,1,0.6165866851806641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,fp8,1,0.5066986481348673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,float16,3,0.6395999987920126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,fp8,3,0.5272106726964315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,float16,7,0.6505173444747925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,fp8,7,0.5511626799901327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,float16,15,0.7902719974517822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,fp8,15,0.6955413023630778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,float16,31,0.7891626358032227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,float16,63,0.7997492949167887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,fp8,63,0.7013653119405111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,fp8,31,0.6981919606526693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,float16,127,0.9824053446451823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,float16,1,0.0645066648721695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,fp8,1,0.05693866809209188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,fp8,127,0.8639252980550131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,float16,3,0.06418666740258534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,fp8,3,0.057562669118245445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,float16,7,0.06442133088906606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,fp8,7,0.056159997979799904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,float16,15,0.06427733103434245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,fp8,15,0.05749333401521047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,float16,31,0.06506666541099548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,fp8,31,0.05766933163007101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,float16,63,0.0644053320089976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,fp8,63,0.057071998715400696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,float16,127,0.06593066453933716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,fp8,127,0.05864533285299937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,float16,1,0.09510933359464009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,fp8,1,0.082805335521698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,float16,3,0.09541866183280945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,fp8,3,0.08309866487979889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,float16,7,0.09517866373062134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,fp8,7,0.08309866487979889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,float16,15,0.09539199868837993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,fp8,15,0.08379733562469482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,float16,31,0.09526933232943217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,fp8,31,0.0830080012480418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,float16,63,0.09522133072217305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,fp8,63,0.08339200417200725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,float16,127,0.09676800171534221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,fp8,127,0.0831413318713506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,float16,1,1.2247467041015625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,fp8,1,1.0083359877268474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,float16,3,1.275333325068156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,fp8,3,1.0461493333180745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,fp8,7,1.1051466464996338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,float16,7,1.304469347000122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,fp8,15,1.3852373758951824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,float16,15,1.5787466367085774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,float16,31,1.5941279729207356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,fp8,31,1.399829387664795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,fp8,63,1.4200959205627441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,float16,63,1.6322132746378581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,fp8,1,1.0164373715718586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,float16,1,1.2240373293558757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,fp8,3,1.0530133247375488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,float16,3,1.280783971150716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,fp8,7,1.105669339497884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,float16,7,1.3208373387654622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,fp8,15,1.39463472366333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,float16,15,1.5988693237304688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,fp8,31,1.417093276977539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,float16,31,1.6261332829793294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,float16,63,1.6434987386067708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,fp8,63,1.4582719802856445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,float16,1,0.116565336783727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,fp8,1,0.10569600264231364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,float16,3,0.11547199885050456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,fp8,3,0.10451199611028035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,float16,7,0.11584533254305522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,fp8,7,0.10512000322341919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,float16,15,0.11643733580907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,fp8,15,0.1053013304869334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,fp8,31,0.1055573324362437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,float16,31,0.1165706713994344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,float16,63,0.11756267150243123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,fp8,63,0.10443199674288432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,float16,1,0.17808000246683756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,fp8,1,0.15471466382344565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,float16,3,0.17723733186721802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,fp8,3,0.1547040045261383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,float16,7,0.17880000670750937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,fp8,7,0.1549493372440338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,float16,15,0.1774079998334249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,fp8,15,0.15430933237075806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,fp8,31,0.1549066702524821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,1,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,1,0.01642666632930438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,3,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,3,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,7,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,7,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,15,0.019093333433071773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,float16,63,0.1772586703300476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,fp8,63,0.1546026666959127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,15,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,31,0.018960000326236088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,31,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,63,0.01942933350801468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,63,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,127,0.021082667013009388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,127,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,255,0.02980799973011017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,255,0.029365333418051403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,511,0.04598399996757507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,511,0.04587199787298838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,1023,0.08072533210118611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,1023,0.07914666831493378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,2047,0.1471626659234365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,2047,0.1450399955113729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,4095,0.2818186680475871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,4095,0.2773653268814087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,8191,0.5506666501363119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,8191,0.5437920093536377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,16383,1.2261119683583577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,16383,1.296511967976888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,1,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,1,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,3,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,3,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,7,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,7,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,15,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,15,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,31,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,31,0.019589333484570186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,63,0.019359999646743137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,63,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,127,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,127,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,255,0.029194665451844532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,255,0.029525332152843475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,511,0.04788800080617269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,511,0.04600533346335093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,float16,31,0.1786186695098877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,1023,0.0809440016746521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,1023,0.07890133559703827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,2047,0.1454080045223236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,2047,0.14873600006103516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,4095,0.2837439974149068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,4095,0.2781280080477397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,8191,0.5578453143437704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,8191,0.5447359879811605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,1,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,1,0.011776000261306763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,3,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,3,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,7,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,7,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,15,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,15,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,31,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,31,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,16383,1.3414559364318848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,63,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,63,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,16383,1.4097280502319336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,127,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,127,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,255,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,255,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,511,0.012826666235923767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,511,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,1023,0.01451733335852623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,1023,0.013541333377361298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,2047,0.01758933315674464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,2047,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,4095,0.021674667795499165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,4095,0.021125334004561108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,8191,0.03978666663169861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,8191,0.026842666169007618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,1,0.011786667009194693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,1,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,16383,0.060133333007494606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,16383,0.04152533411979675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,3,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,3,0.011381333072980246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,7,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,7,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,15,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,15,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,31,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,31,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,63,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,63,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,127,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,127,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,255,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,255,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,511,0.012768000364303589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,511,0.011237333218256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,1023,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,1023,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,2047,0.020928000410397846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,2047,0.01752000053723653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,4095,0.03772799919048945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,4095,0.023386667172114056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,8191,0.05983999868233999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,8191,0.03845333307981491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,1,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,16383,0.10273067156473796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,1,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,16383,0.05715199808279673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,1,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,1,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,3,0.01747200017174085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,3,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,3,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,3,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,7,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,7,0.01748266691962878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,7,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,7,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,15,0.017525333911180496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,15,0.017978666971127193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,15,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,15,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,31,0.019258666783571243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,31,0.019461333751678467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,31,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,31,0.017610666652520496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,63,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,63,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,63,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,63,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,127,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,127,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,127,0.01736533393462499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,127,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,255,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,255,0.021967999637126923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,255,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,255,0.02091199904680252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,511,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,511,0.029631999631722767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,511,0.017509333789348602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,511,0.02924266705910365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,1023,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,1023,0.04794133206208547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,1023,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,1023,0.045706664522488914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,2047,0.0195573332409064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,2047,0.08087466657161713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,2047,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,2047,0.07920533418655396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,4095,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,4095,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,4095,0.1483466625213623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,4095,0.14708800117174783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,8191,0.01961600035429001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,8191,0.2836479942003886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,8191,0.017535999417304993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,8191,0.28287466367085773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,16383,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,16383,0.017637333522240322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,16383,0.5930613279342651
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,1,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,1,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,1,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,1,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,3,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,16383,0.5932106574376425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,3,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,3,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,7,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,7,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,7,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,7,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,15,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,15,0.011551999797423681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,15,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,31,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,15,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,31,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,31,0.01138666644692421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,31,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,63,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,63,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,63,0.01137599969903628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,63,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,127,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,127,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,127,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,127,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,255,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,255,0.011418666690587997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,255,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,255,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,511,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,511,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,511,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,511,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,1023,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,1023,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,1023,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,1023,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,2047,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,2047,0.016805333395799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,2047,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,2047,0.016864000509182613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,4095,0.012602667013804117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,4095,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,4095,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,4095,0.019424000134070713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,8191,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,8191,0.02537599951028824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,8191,0.013269333789745966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,8191,0.02369066576162974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,16383,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,16383,0.042090664307276406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,16383,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,1,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,1,0.011882666498422623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,1,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,16383,0.03457066665093104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,1,0.011578666667143503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,3,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,3,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,3,0.011365332951148352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,7,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,7,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,7,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,7,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,15,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,15,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,15,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,15,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,31,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,31,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,31,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,31,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,63,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,63,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,63,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,63,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,127,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,127,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,127,0.011333333949247995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,127,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,255,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,255,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,255,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,255,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,511,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,511,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,511,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,511,0.0122079998254776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,1023,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,1023,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,1023,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,1023,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,2047,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,2047,0.01749333366751671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,2047,0.014202666779359182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,2047,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,4095,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,4095,0.02342933416366577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,4095,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,4095,0.021429332594076794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,8191,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,8191,0.03824000060558319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,8191,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,8191,0.031317333380381264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,16383,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,16383,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,16383,0.05841066439946493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,16383,0.0518453319867452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,1,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,1,0.012533333152532578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,1,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,1,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,3,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,3,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,3,0.011391999820868174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,7,0.012666666259368261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,3,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,7,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,7,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,7,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,15,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,15,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,15,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,15,0.011823999385039011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,31,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,31,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,31,0.011535999675591787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,31,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,63,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,63,0.012650666137536367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,63,0.01156266654531161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,63,0.012250666817029318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,127,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,127,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,127,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,127,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,255,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,255,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,255,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,255,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,511,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,511,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,511,0.012181332955757776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,511,0.013167999684810638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,1023,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,1023,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,1023,0.012831999609867731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,1023,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,2047,0.014805333067973455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,2047,0.023647998770078022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,2047,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,2047,0.02180800090233485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,4095,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,4095,0.03944533318281174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,4095,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,4095,0.031701333820819855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,8191,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,8191,0.06035733222961426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,8191,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,8191,0.05207466582457224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,16383,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,16383,0.1033066709836324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,16383,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,1,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,1,0.009349333122372627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,1,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,1,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,3,0.008842666943868002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,16383,0.08701866865158081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,3,0.009290666629870733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,3,0.00926399976015091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,3,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,7,0.009317333499590555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,7,0.008810666700204214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,7,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,15,0.00878399983048439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,7,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,15,0.009642666826645533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,15,0.00891733355820179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,15,0.010533332824707031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,31,0.009519999846816063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,31,0.009061333412925402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,31,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,31,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,63,0.008869333192706108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,63,0.009418666362762451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,63,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,63,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,127,0.010277333358923594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,127,0.010570666442314783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,127,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,127,0.012773333738247553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,255,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,255,0.008912000184257826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,255,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,255,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,511,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,511,0.01331199953953425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,511,0.012736000120639801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,511,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,1023,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,1023,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,1023,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,1023,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,2047,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,2047,0.020960000654061634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,2047,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,2047,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,4095,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,4095,0.02717333287000656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,4095,0.021104000508785248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,4095,0.02922133356332779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,8191,0.017514667163292568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,8191,0.03576533248027166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,8191,0.02096533278624217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,8191,0.03758399933576584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,16383,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,16383,0.052799999713897705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,16383,0.021066665649414062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,16383,0.05454400181770325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,32767,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,32767,0.08694400389989217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,32767,0.021429332594076794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,32767,0.0811466674009959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,65535,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,65535,0.021520001192887623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,65535,0.156741331020991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,1,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,1,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,1,0.010575999816258749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,1,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,65535,0.1381226678689321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,3,0.012298667182525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,3,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,3,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,3,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,7,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,7,0.010490667074918747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,7,0.010543999572594961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,7,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,15,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,15,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,15,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,15,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,31,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,31,0.010480000327030817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,31,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,31,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,63,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,63,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,63,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,63,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,127,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,127,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,127,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,127,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,255,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,255,0.010565333068370819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,255,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,255,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,511,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,511,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,511,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,511,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,1023,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,1023,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,1023,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,1023,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,2047,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,2047,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,2047,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,2047,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,4095,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,4095,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,4095,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,4095,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,8191,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,8191,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,8191,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,8191,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,16383,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,16383,0.020725333442290623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,16383,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,16383,0.01969066634774208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,32767,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,32767,0.02253866692384084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,32767,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,32767,0.02290133386850357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,65535,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,65535,0.02457600086927414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,1,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,65535,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,1,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,1,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,1,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,3,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,65535,0.023413332800070446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,3,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,3,0.010496000448862711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,3,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,7,0.009178666397929192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,7,0.009930666536092758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,7,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,7,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,15,0.009674666449427605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,15,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,15,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,15,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,31,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,31,0.009050666665037474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,31,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,63,0.010005333150426546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,63,0.009178666397929192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,63,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,63,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,127,0.010255999863147736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,127,0.010618666807810465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,127,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,127,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,255,0.009546666716535887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,255,0.010458666831254959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,255,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,255,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,511,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,511,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,511,0.011909333368142446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,511,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,1023,0.010565333068370819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,1023,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,1023,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,1023,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,2047,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,2047,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,2047,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,2047,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,4095,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,4095,0.012703999876976013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,4095,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,4095,0.012773333738247553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,8191,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,8191,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,8191,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,8191,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,16383,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,16383,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,16383,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,16383,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,32767,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,32767,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,32767,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,32767,0.01889066646496455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,65535,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,65535,0.021583999196688335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,1,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,65535,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,1,0.010351999973257383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,1,0.010527999450763067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,1,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,3,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,3,0.010266666611035665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,65535,0.020474666108687718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,3,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,3,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,7,0.010533332824707031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,7,0.00915733352303505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,7,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,7,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,15,0.009072000160813332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,15,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,15,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,15,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,31,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,31,0.00983466642598311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,31,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,31,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,63,0.009786666681369146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,63,0.010458666831254959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,63,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,63,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,127,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,127,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,127,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,127,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,255,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,255,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,255,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,255,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,511,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,511,0.010570666442314783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,511,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,511,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,1023,0.0099093330403169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,1023,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,1023,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,1023,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,2047,0.01055466632048289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,2047,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,2047,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,2047,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,4095,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,4095,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,4095,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,4095,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,8191,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,8191,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,8191,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,8191,0.011663999408483505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,16383,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,16383,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,16383,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,16383,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,32767,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,32767,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,32767,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,32767,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,65535,0.011509332805871964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,65535,0.023658665517965954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,65535,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,1,0.009141333401203156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,1,0.009279999881982803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,65535,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,1,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,1,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,3,0.009535999968647957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,3,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,3,0.00891733355820179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,3,0.010506667196750641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,7,0.009136000027259191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,7,0.008789333204428354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,7,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,15,0.009114666531483332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,7,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,15,0.00891733355820179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,15,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,15,0.010512000570694605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,31,0.00901333304742972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,31,0.008826666822036108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,31,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,31,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,63,0.009098666409651438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,63,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,63,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,63,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,127,0.01027199998497963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,127,0.009173333023985228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,127,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,127,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,255,0.008826666822036108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,255,0.008997333546479544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,255,0.012608000387748083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,255,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,511,0.013248000293970108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,511,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,511,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,511,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,1023,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,1023,0.021082667013009388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,1023,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,1023,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,2047,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,2047,0.025285333395004272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,2047,0.02160000056028366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,2047,0.027600000301996868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,4095,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,4095,0.03188266605138779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,4095,0.021354667842388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,4095,0.03601066768169403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,8191,0.018735999862353008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,8191,0.04595733185609182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,8191,0.021274665991465252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,8191,0.04782933493455251
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,16383,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,16383,0.02146133283774058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,16383,0.07497600217660268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,16383,0.07133333384990692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,32767,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,32767,0.1341600020726522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,32767,0.02088533341884613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,32767,0.12177067001660664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,65535,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,65535,0.24888000885645548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,1,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,1,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,65535,0.021402666966120403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,1,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,1,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,3,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,3,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,3,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,65535,0.21821333964665732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,3,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,7,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,7,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,7,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,7,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,15,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,15,0.010453333457310995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,15,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,31,0.010005333150426546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,31,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,15,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,31,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,31,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,63,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,63,0.010543999572594961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,63,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,63,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,127,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,127,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,127,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,127,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,255,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,255,0.009029333169261614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,255,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,255,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,511,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,511,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,511,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,511,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,1023,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,1023,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,1023,0.012170666207869848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,1023,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,2047,0.012831999609867731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,2047,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,2047,0.01181866725285848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,2047,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,4095,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,4095,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,4095,0.013274667163689932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,4095,0.011477333803971609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,8191,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,8191,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,8191,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,8191,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,16383,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,16383,0.018954666952292126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,16383,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,16383,0.018906666586796444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,32767,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,32767,0.021456000705560047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,32767,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,32767,0.020506666352351505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,65535,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,65535,0.022543999056021374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,1,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,65535,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,1,0.010543999572594961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,1,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,1,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,3,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,3,0.010512000570694605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,65535,0.02109333376089732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,3,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,3,0.010415999839703241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,7,0.010474666953086853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,7,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,7,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,7,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,15,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,15,0.009061333412925402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,15,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,15,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,31,0.009472000102202097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,31,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,31,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,31,0.010138666878143946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,63,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,63,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,63,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,63,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,127,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,127,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,127,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,127,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,255,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,255,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,255,0.010506667196750641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,255,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,511,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,511,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,511,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,511,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,1023,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,1023,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,1023,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,1023,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,2047,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,2047,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,2047,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,2047,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,4095,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,4095,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,4095,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,4095,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,8191,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,8191,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,8191,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,8191,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,16383,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,16383,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,16383,0.01747200017174085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,16383,0.011557333171367645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,32767,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,32767,0.018954666952292126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,32767,0.011429333438475927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,32767,0.01974933346112569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,65535,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,65535,0.02370133250951767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,65535,0.011391999820868174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,1,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,1,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,65535,0.023141334454218548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,1,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,1,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,3,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,3,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,3,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,3,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,7,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,7,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,7,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,7,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,15,0.009152000149091085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,15,0.010575999816258749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,15,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,15,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,31,0.009077333534757296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,31,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,31,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,31,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,63,0.009637333452701569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,63,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,63,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,63,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,127,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,127,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,127,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,127,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,255,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,255,0.009119999905427298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,255,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,255,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,511,0.008885333314538002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,511,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,511,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,511,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,1023,0.010064000263810158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,1023,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,1023,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,1023,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,2047,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,2047,0.013317332913478216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,2047,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,2047,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,4095,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,4095,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,4095,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,4095,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,8191,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,8191,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,8191,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,8191,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,16383,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,16383,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,16383,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,16383,0.0186666672428449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,32767,0.011354666203260422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,32767,0.023578666150569916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,32767,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,32767,0.021573332448800404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,65535,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,65535,0.03748800108830134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,1,0.025589334468046825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,65535,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,1,0.025663999219735462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,1,0.02309866746266683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,1,0.0230880007147789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,65535,0.02959466725587845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,3,0.025813333690166473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,3,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,3,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,3,0.023226665953795116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,7,0.02736533433198929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,7,0.02741333345572154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,7,0.023317334552605946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,7,0.023354666928450268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,15,0.027434666951497395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,15,0.02717866748571396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,15,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,15,0.023258666197458904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,31,0.031199999153614044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,31,0.03161066770553589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,31,0.02941333254178365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,31,0.029557332396507263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,63,0.03139200061559677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,63,0.03166399896144867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,63,0.029343999922275543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,63,0.029461334149042766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,127,0.03134933362404505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,127,0.031888000667095184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,127,0.029391999046007793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,127,0.029215998947620392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,255,0.03180266668399175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,255,0.037231999138991036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,255,0.029509333272775013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,255,0.03350933392842611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,511,0.05400000015894572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,511,0.03279466678698858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,511,0.029535998900731403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,511,0.05019199848175049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,1023,0.03162666658560435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,1023,0.02934933453798294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,1023,0.08295999964078267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,1023,0.08710933725039165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,2047,0.03163733333349228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,2047,0.15260799725850424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,2047,0.029493334392706554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,2047,0.15034133195877075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,4095,0.03177600105603536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,4095,0.02959999938805898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,4095,0.2844853401184082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,4095,0.2818560004234314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,8191,0.03148799886306127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,1,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,8191,0.029343999922275543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,1,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,8191,0.5569866498311361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,1,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,1,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,3,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,3,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,8191,0.545141339302063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,3,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,3,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,7,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,7,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,7,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,7,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,15,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,15,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,15,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,15,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,31,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,31,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,31,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,31,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,63,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,63,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,63,0.011546666423479715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,63,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,127,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,127,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,127,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,127,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,255,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,255,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,255,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,255,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,511,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,511,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,511,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,511,0.011407999942700068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,1023,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,1023,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,1023,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,1023,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,2047,0.013855999956528345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,2047,0.018965333700180054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,2047,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,2047,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,4095,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,4095,0.025125332176685333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,4095,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,4095,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,8191,0.014778666198253632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,8191,0.040149333576361336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,1,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,8191,0.01479999969402949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,1,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,8191,0.03373866776625315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,1,0.012805332740147909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,1,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,3,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,3,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,3,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,3,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,7,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,7,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,7,0.012810666114091873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,7,0.011829332758982977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,15,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,15,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,15,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,15,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,31,0.011514666179815928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,31,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,31,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,31,0.012042666474978128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,63,0.012629333883523941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,63,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,63,0.012191999703645706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,63,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,127,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,127,0.012042666474978128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,127,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,127,0.012847999731699625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,255,0.011802667131026586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,255,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,255,0.012335999558369318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,255,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,511,0.011834666132926941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,511,0.013248000293970108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,511,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,511,0.01321600005030632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,1023,0.01179733375708262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,1023,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,1023,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,1023,0.012053333222866058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,2047,0.014688000082969666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,2047,0.024480000138282776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,2047,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,2047,0.02145066608985265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,4095,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,4095,0.039621333281199135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,4095,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,4095,0.031850665807724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,8191,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,8191,0.06039466460545858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,1,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,8191,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,1,0.013466666142145792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,8191,0.05319466690222422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,1,0.013082666943470636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,1,0.013354666531085968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,3,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,3,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,3,0.014805333067973455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,3,0.014432000617186228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,7,0.013295999417702356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,7,0.014783999572197596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,7,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,7,0.013194666554530462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,15,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,15,0.013343999783198038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,15,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,15,0.014720000326633453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,31,0.013269333789745966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,31,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,31,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,31,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,63,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,63,0.013338666409254074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,63,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,63,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,127,0.01351999988158544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,127,0.013317332913478216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,127,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,127,0.013552000125249227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,255,0.013274667163689932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,255,0.014837333311637243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,255,0.01331199953953425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,255,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,511,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,511,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,511,0.014757333944241205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,511,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,1023,0.01333333303531011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,1023,0.022805333137512207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,1023,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,1023,0.020853333175182343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,2047,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,2047,0.04058666775623957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,2047,0.017509333789348602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,2047,0.031701333820819855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,4095,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,4095,0.06217599908510844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,4095,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,4095,0.0521919975678126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,8191,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,8191,0.10532800356547038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,8191,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,1,0.008943999807039896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,1,0.00933333362142245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,8191,0.08890666564305623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,1,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,1,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,3,0.009141333401203156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,3,0.009077333534757296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,3,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,3,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,7,0.009418666362762451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,7,0.00943999985853831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,7,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,7,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,15,0.009061333412925402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,15,0.008736000085870424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,15,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,15,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,31,0.008778666456540426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,31,0.009216000015536943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,31,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,31,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,63,0.010575999816258749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,63,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,63,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,63,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,127,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,127,0.008874666566650072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,127,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,127,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,255,0.009365333244204521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,255,0.010581333190202713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,255,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,255,0.012831999609867731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,511,0.01952533299724261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,511,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,511,0.012709333250919977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,1023,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,511,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,1023,0.023418667415777843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,1023,0.02310933421055476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,1023,0.02752533306678136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,2047,0.02962133288383484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,2047,0.022991999983787537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,2047,0.019610666980346043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,2047,0.03161066770553589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,4095,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,4095,0.039488000174363456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,4095,0.023584000766277313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,4095,0.04081066697835922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,8191,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,8191,0.062047998110453285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,8191,0.02370133250951767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,8191,0.05834666887919108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,16383,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,16383,0.10579733053843181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,16383,0.023141334454218548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,16383,0.0955466628074646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,32767,0.019765333582957584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,32767,0.19335466623306274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,32767,0.023210667073726654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,32767,0.16899732748667398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,65535,0.019359999646743137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,65535,0.36791467666625977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,1,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,65535,0.023178666830062866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,1,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,1,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,1,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,3,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,3,0.010288000106811523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,3,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,3,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,65535,0.31466132402420044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,7,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,7,0.009658666948477427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,7,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,7,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,15,0.009301333377758661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,15,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,15,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,15,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,31,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,31,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,31,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,31,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,63,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,63,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,63,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,63,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,127,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,127,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,127,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,127,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,255,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,255,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,255,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,255,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,511,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,511,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,511,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,511,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,1023,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,1023,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,1023,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,1023,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,2047,0.012805332740147909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,2047,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,2047,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,2047,0.013909333695967993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,4095,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,4095,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,4095,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,4095,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,8191,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,8191,0.017551999539136887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,8191,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,8191,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,16383,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,16383,0.013354666531085968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,16383,0.0199946661790212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,16383,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,32767,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,32767,0.021189334491888683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,32767,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,32767,0.021045332153638203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,65535,0.01331199953953425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,65535,0.02722666660944621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,65535,0.02516799916823705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,65535,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,1,0.010405333091815313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,1,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,1,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,1,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,3,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,3,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,3,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,3,0.01138666644692421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,7,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,7,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,7,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,7,0.01055466632048289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,15,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,15,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,15,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,15,0.010442666709423065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,31,0.010474666953086853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,31,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,31,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,31,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,63,0.01055466632048289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,63,0.009194666519761086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,63,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,63,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,127,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,127,0.00902399979531765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,127,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,127,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,255,0.009488000224033991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,255,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,255,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,255,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,511,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,511,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,511,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,511,0.011391999820868174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,1023,0.010506667196750641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,1023,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,1023,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,1023,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,2047,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,2047,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,2047,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,2047,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,4095,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,4095,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,4095,0.012245333443085352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,4095,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,8191,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,8191,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,8191,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,8191,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,16383,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,16383,0.021407999098300934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,16383,0.012245333443085352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,16383,0.019343999524911244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,32767,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,32767,0.024618667860825855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,32767,0.01192533348997434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,32767,0.023567999402681988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,65535,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,65535,0.03846933444341024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,1,0.010245333115259806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,65535,0.012810666114091873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,1,0.010480000327030817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,1,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,1,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,3,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,3,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,65535,0.031221332649389904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,3,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,3,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,7,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,7,0.010501333822806677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,7,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,7,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,15,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,15,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,15,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,15,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,31,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,31,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,31,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,31,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,63,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,63,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,63,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,63,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,127,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,127,0.009866666669646898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,127,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,127,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,255,0.009962666779756546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,255,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,255,0.012522666404644648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,255,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,511,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,511,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,511,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,511,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,1023,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,1023,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,1023,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,1023,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,2047,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,2047,0.01339200014869372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,2047,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,2047,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,4095,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,4095,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,4095,0.013424000392357508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,4095,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,8191,0.012106666962305704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,8191,0.01788266624013583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,8191,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,8191,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,16383,0.012773333738247553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,16383,0.023103999594847362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,16383,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,16383,0.021530665457248688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,32767,0.011898666620254517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,32767,0.037802666425704956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,32767,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,32767,0.029861333469549816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,65535,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,65535,0.0582239975531896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,1,0.04569066564242045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,65535,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,1,0.04588800172011057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,1,0.03774933268626531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,1,0.03764266769091288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,65535,0.051445335149765015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,3,0.045994664231936135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,3,0.04577599962552389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,3,0.037861332297325134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,3,0.03774933268626531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,7,0.0462666650613149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,7,0.045968001087506614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,7,0.0395413339138031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,7,0.039477333426475525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,15,0.04809600114822388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,15,0.04789333542188009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,15,0.040048000713189445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,15,0.04001066585381826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,31,0.05622933308283488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,31,0.05603733162085215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,31,0.050160000721613564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,31,0.05002133548259735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,63,0.056133334835370384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,63,0.056287998954455055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,63,0.050245334704717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,63,0.050154666105906166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,127,0.05618133147557577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,127,0.05650666852792104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,127,0.050111999114354454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,127,0.05041599770387014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,255,0.05624533196290334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,255,0.06623999774456024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,255,0.04994133114814758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,255,0.06035733222961426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,511,0.05728533367315928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,511,0.09729599952697754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,511,0.0498986691236496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,511,0.09129066268603007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,1023,0.056688000758488975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,1023,0.1586186687151591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,1023,0.05022400120894114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,1023,0.15242666999499002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,2047,0.05668266614278158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,2047,0.28562132517496747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,2047,0.05097599824269613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,2047,0.27753599484761554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,4095,0.05691733459631602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,1,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,4095,0.05011733373006185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,4095,0.5456800063451132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,1,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,1,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,1,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,4095,0.5292266607284546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,3,0.012831999609867731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,3,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,3,0.013301332791646322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,3,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,7,0.011936000237862269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,7,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,7,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,7,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,15,0.011557333171367645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,15,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,15,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,31,0.012757333616415659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,15,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,31,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,31,0.012128000458081564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,31,0.014346666634082794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,63,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,63,0.012693333129088083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,63,0.012554666648308435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,63,0.012805332740147909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,127,0.012789333860079447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,127,0.012714666624863943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,127,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,127,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,255,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,255,0.013253333667914072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,255,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,255,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,511,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,511,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,511,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,511,0.014831999937693277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,1023,0.012789333860079447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,1023,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,1023,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,1023,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,2047,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,2047,0.025466665625572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,2047,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,2047,0.02309866746266683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,4095,0.015594666202863058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,4095,0.042175998290379844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,4095,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,1,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,1,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,4095,0.03538133452335993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,1,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,1,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,3,0.014767999450365702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,3,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,3,0.014826666563749313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,3,0.014741333822409311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,7,0.013429333766301474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,7,0.013951999445756277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,7,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,7,0.013760000467300415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,15,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,15,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,15,0.015989333391189575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,15,0.014730667074521383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,31,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,31,0.013733333597580591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,31,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,31,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,63,0.014549333602190018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,63,0.01393066719174385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,63,0.013679999858140945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,63,0.013807999591032663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,127,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,127,0.013487999637921652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,127,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,127,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,255,0.01403733342885971
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,255,0.013733333597580591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,255,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,255,0.013434667140245438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,511,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,511,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,511,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,511,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,1023,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,1023,0.0230880007147789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,1023,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,1023,0.021231998999913532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,2047,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,2047,0.040549332896868386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,2047,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,2047,0.0327360009153684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,4095,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,4095,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,4095,0.06195733447869619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,1,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,1,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,1,0.017583999782800674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,3,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,1,0.017653333644072216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,4095,0.05379199981689453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,3,0.017893332988023758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,3,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,7,0.01735466718673706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,3,0.017610666652520496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,7,0.01743999992807706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,7,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,7,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,15,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,15,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,15,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,15,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,31,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,31,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,31,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,31,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,63,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,63,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,63,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,63,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,127,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,127,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,127,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,127,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,255,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,255,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,255,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,255,0.016858667135238647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,511,0.018842666099468868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,511,0.02311466634273529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,511,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,511,0.021141332884629566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,1023,0.018885333091020584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,1023,0.035786665976047516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,1023,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,1023,0.029285334050655365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,2047,0.020928000410397846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,2047,0.058304001887639366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,2047,0.01942933350801468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,2047,0.04987200101216634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,4095,0.0210506667693456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,4095,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,4095,0.0963253378868103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,4095,0.08108266691366832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,1,0.08498666683832805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,1,0.08474666873613994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,1,0.07044266661008199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,1,0.07028799752394359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,3,0.08487466971079509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,3,0.0846720039844513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,3,0.07054933408896129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,3,0.07044266661008199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,7,0.087226668993632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,7,0.08700266480445862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,7,0.07226666808128357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,7,0.07230400045712788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,15,0.08884800473848979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,15,0.07469866673151652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,15,0.08879466851552327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,15,0.0746559997399648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,31,0.10559999942779541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,31,0.10550933082898457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,31,0.09327999750773112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,31,0.09333866834640503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,63,0.10706667105356853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,63,0.1072213351726532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,63,0.093231995900472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,63,0.0932373305161794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,127,0.10749333103497823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,127,0.09489066402117412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,127,0.1076639990011851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,127,0.09504000345865886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,255,0.1074666678905487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,255,0.12589866916338602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,255,0.09497066338857015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,255,0.1134986678759257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,511,0.10744000474611919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,511,0.09475200374921162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,511,0.1871359944343567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,511,0.1753013332684835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,1023,0.10739200313886006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,1023,0.09371733665466309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,1023,0.31035200754801434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,1,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,1,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,1023,0.29814932743708294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,1,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,1,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,3,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,3,0.015775999675194424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,3,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,3,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,7,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,7,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,7,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,7,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,15,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,15,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,15,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,15,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,31,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,31,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,31,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,31,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,63,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,63,0.014896000425020853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,63,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,63,0.014805333067973455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,127,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,127,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,127,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,127,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,255,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,255,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,255,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,255,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,511,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,511,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,511,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,511,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,1023,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,1023,0.024143998821576435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,1023,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,1023,0.023200000325838726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,1,0.01878400022784869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,1,0.018373332917690277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,1,0.01754133279124896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,1,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,3,0.01886933296918869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,3,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,3,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,3,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,7,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,7,0.018725333114465077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,7,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,7,0.017450666675964992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,15,0.017504000415404636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,15,0.01940800001223882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,15,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,15,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,31,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,31,0.018191999445358913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,31,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,31,0.01748266691962878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,63,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,63,0.017610666652520496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,63,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,63,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,127,0.017525333911180496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,127,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,127,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,127,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,255,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,255,0.017727999637524288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,255,0.01859733338157336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,255,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,511,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,511,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,511,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,511,0.02128000060717265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,1023,0.01882133384545644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,1023,0.034874667723973594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,1023,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,1,0.02534399926662445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,1023,0.029717333614826202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,1,0.02526933451493581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,1,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,1,0.023605334262053173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,3,0.025173333783944447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,3,0.025194667279720306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,3,0.023423999547958374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,3,0.02316266546646754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,7,0.025439999997615814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,7,0.025386666258176167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,7,0.023685333629449207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,7,0.02314666658639908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,15,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,15,0.025274666647116344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,15,0.02332799881696701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,15,0.02363733450571696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,31,0.02532800038655599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,31,0.025285333395004272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,31,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,31,0.023178666830062866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,63,0.02518400053183238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,63,0.024106666445732117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,63,0.025477332373460133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,63,0.023418667415777843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,127,0.025253333151340485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,127,0.025392000873883564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,127,0.023354666928450268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,255,0.02593066543340683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,255,0.025114665428797405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,127,0.024853333830833435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,255,0.02518400053183238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,255,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,511,0.027093333502610523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,511,0.03857066730658213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,511,0.02516266703605652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,511,0.031343999008337654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,1023,0.027322667340437572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,1023,0.025248001019159954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,1023,0.05709333221117655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,1023,0.05003733436266581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,1,0.1625226636727651
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,1,0.16158933440844217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,1,0.13243732849756876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,1,0.1320266624291738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,3,0.1625599960486094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,3,0.1621226668357849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,3,0.13223999738693237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,3,0.13219733039538065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,7,0.16707199811935425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,7,0.16662399967511496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,7,0.1364479959011078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,7,0.13619200388590494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,15,0.1706506609916687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,15,0.16902933518091837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,15,0.1423360009988149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,15,0.14230933785438538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,31,0.20567999283472696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,31,0.20569066206614176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,31,0.17993066708246866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,31,0.181002676486969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,63,0.20602667331695557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,63,0.20586667458216348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,63,0.18125333388646445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,63,0.18128534158070883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,127,0.20774400234222412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,127,0.20800000429153442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,127,0.18121600151062012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,127,0.1830079952875773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,255,0.2081813414891561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,255,0.18119466304779053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,255,0.21994666258494058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,255,0.24468799432118735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,511,0.20918399095535278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,511,0.1830880045890808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,511,0.3672959804534912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,511,0.34301865100860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,1,0.01950399950146675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,1,0.019546666493018467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,1,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,1,0.01899733394384384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,3,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,3,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,3,0.018954666952292126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,3,0.019354666272799175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,7,0.019567999988794327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,7,0.01942933350801468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,7,0.01886933296918869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,7,0.01959466685851415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,15,0.01956266661485036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,15,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,15,0.01915733392039935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,15,0.018768000106016796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,31,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,31,0.019402666638294857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,31,0.018917333334684372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,31,0.019424000134070713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,63,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,63,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,63,0.019445333629846573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,63,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,127,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,127,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,127,0.019589333484570186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,127,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,255,0.021231998999913532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,255,0.019648000597953796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,255,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,255,0.019152000546455383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,511,0.020997333029905956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,511,0.025413334369659424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,511,0.019359999646743137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,1,0.027162666122118633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,511,0.023232000569502514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,1,0.025349333882331848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,1,0.025487999121348064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,1,0.025386666258176167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,3,0.025557334224383037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,3,0.025749333202838898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,3,0.025237334271272022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,3,0.025146665672461193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,7,0.027029333015282948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,7,0.025562666356563568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,7,0.025045332809289295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,7,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,15,0.027215999861558277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,15,0.025418666501839954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,15,0.025797332326571148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,15,0.02532266577084859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,31,0.026608000199000042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,31,0.0271519993742307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,31,0.025279998779296875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,31,0.025333332518736523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,63,0.02739733209212621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,63,0.027130665878454845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,63,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,63,0.025114665428797405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,127,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,127,0.025839999318122864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,127,0.02518933266401291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,127,0.02498133232196172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,255,0.027301333844661713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,255,0.02651199946800868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,255,0.025450666745503742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,255,0.025461333493391674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,511,0.027317332724730175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,511,0.03951466580231985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,511,0.025461333493391674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,1,0.04041599979003271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,511,0.033600000043710075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,1,0.04167999823888143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,1,0.03788800040880839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,1,0.03743999948104223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,3,0.03995733211437861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,3,0.041589332123597465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,3,0.037674665451049805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,3,0.037690666814645134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,7,0.03998400022586187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,7,0.041690667470296226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,7,0.037685332198937736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,7,0.03741333385308584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,15,0.03993066648642222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,15,0.03993066648642222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,15,0.037621334195137024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,15,0.03766400118668874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,31,0.04041066765785217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,31,0.03998400022586187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,31,0.03746666759252548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,31,0.03755199909210205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,63,0.04004266609748205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,63,0.039962666730086006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,63,0.03775999943415324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,63,0.03748266647259394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,127,0.039887999494870506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,127,0.03982399900754293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,127,0.037685332198937736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,127,0.037471999724706016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,255,0.04190400242805481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,255,0.04299733539422353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,255,0.039503999054431915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,255,0.03647999962170919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,511,0.04155199974775314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,511,0.0621919979651769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,511,0.03946666667858759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,1,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,1,0.013167999684810638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,1,0.009408000235756239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,1,0.009061333412925402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,511,0.05384533107280731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,3,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,3,0.013082666943470636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,3,0.009392000113924345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,3,0.008943999807039896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,7,0.013445333888133367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,7,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,7,0.008976000050703684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,7,0.009312000125646591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,15,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,15,0.013290667285521826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,15,0.009392000113924345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,15,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,31,0.013301332791646322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,31,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,31,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,31,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,63,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,63,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,63,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,63,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,127,0.015477333217859268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,127,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,127,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,127,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,255,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,255,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,255,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,255,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,511,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,511,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,511,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,511,0.014762666076421738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,1023,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,1023,0.021498667697111767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,1023,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,1023,0.019413333386182785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,2047,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,2047,0.03335466732581457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,2047,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,2047,0.028245332340399425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,4095,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,4095,0.052149335543314614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,4095,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,4095,0.04828266799449921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,8191,0.015599999576807022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,8191,0.0911253293355306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,8191,0.012714666624863943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,8191,0.08692800005276997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,16383,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,16383,0.17071467638015747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,16383,0.012831999609867731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,16383,0.16270400087038675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,32767,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,32767,0.3267306685447693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,32767,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,32767,0.3314773241678874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,65535,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,65535,0.012650666137536367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,65535,0.739461342493693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,1,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,1,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,1,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,1,0.010512000570694605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,3,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,3,0.011445333560307821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,3,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,3,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,7,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,7,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,7,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,15,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,7,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,15,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,15,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,31,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,15,0.01145600030819575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,65535,0.7071306705474854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,31,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,31,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,31,0.01138666644692421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,63,0.014186666657527288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,63,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,63,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,63,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,127,0.010565333068370819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,127,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,127,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,127,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,255,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,255,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,255,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,255,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,511,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,511,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,511,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,511,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,1023,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,1023,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,1023,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,1023,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,2047,0.012719999998807907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,2047,0.01481066644191742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,2047,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,2047,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,4095,0.012538666526476542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,4095,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,4095,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,4095,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,8191,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,8191,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,8191,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,8191,0.017504000415404636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,16383,0.012810666114091873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,16383,0.021359999974568684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,16383,0.012847999731699625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,16383,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,32767,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,32767,0.025616000096003216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,32767,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,32767,0.025557334224383037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,65535,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,65535,0.04113066693147024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,1,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,1,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,65535,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,1,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,1,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,3,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,3,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,3,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,65535,0.03356799980004629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,3,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,7,0.009216000015536943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,7,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,7,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,7,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,15,0.01051733394463857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,15,0.010522666076819101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,15,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,15,0.010464000205198923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,31,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,31,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,31,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,31,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,63,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,63,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,63,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,63,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,127,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,127,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,127,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,127,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,255,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,255,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,255,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,255,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,511,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,511,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,511,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,511,0.012655999511480331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,1023,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,1023,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,1023,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,1023,0.011584000041087469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,2047,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,2047,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,2047,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,2047,0.013845333208640417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,4095,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,4095,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,4095,0.012304000556468964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,4095,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,8191,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,8191,0.018122666825850803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,8191,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,8191,0.017583999782800674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,16383,0.012815999488035837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,16383,0.023434666295846302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,16383,0.01219733307758967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,16383,0.02143999934196472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,32767,0.012714666624863943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,32767,0.037578667203585304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,32767,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,32767,0.031397332747777305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,65535,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,65535,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,65535,0.05862399935722351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,1,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,1,0.009770666559537252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,1,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,65535,0.05215999980767568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,1,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,3,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,3,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,3,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,3,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,7,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,7,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,7,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,7,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,15,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,15,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,15,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,15,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,31,0.010496000448862711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,31,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,31,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,31,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,63,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,63,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,63,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,63,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,127,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,127,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,127,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,127,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,255,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,255,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,255,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,255,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,511,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,511,0.010378666842977205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,511,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,511,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,1023,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,1023,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,1023,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,1023,0.012746666868527731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,2047,0.013477332890033722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,2047,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,2047,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,2047,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,4095,0.012719999998807907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,4095,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,4095,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,4095,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,8191,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,8191,0.02332799881696701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,8191,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,8191,0.02144533395767212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,16383,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,16383,0.038586666186650596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,16383,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,16383,0.03148799886306127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,32767,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,32767,0.05707733333110809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,32767,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,32767,0.052298665046691895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,65535,0.012213333199421564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,65535,0.0997759997844696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,65535,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,65535,0.08602666854858398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,1,0.3163253267606099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,1,0.3163466652234395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,1,0.25684799750645954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,1,0.25755733251571655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,3,0.3163680036862691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,3,0.316810667514801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,3,0.2570613423983256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,3,0.2574026584625244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,7,0.32452799876530963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,7,0.3266719977060954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,7,0.2653973301251729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,7,0.2670240004857381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,15,0.3325066765149434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,15,0.27746667464574176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,15,0.3327626585960388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,15,0.27780266602834064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,31,0.40274667739868164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,31,0.4023040135701497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,31,0.353333314259847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,31,0.35525333881378174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,63,0.4063040018081665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,63,0.405290683110555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,63,0.35604266325632733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,63,0.3569759925206502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,127,0.4086560010910034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,127,0.35715198516845703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,127,0.4086666504542033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,127,0.358026663462321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,255,0.41010133425394696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,255,0.3574026823043823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,255,0.4822239875793457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,255,0.43290666739145917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,1,0.029498666524887085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,1,0.0296426663796107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,1,0.02956266701221466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,1,0.029365333418051403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,3,0.02972800036271413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,3,0.029301332930723827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,3,0.02962133288383484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,3,0.029322666426499683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,7,0.029616000751654308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,7,0.029487999776999157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,7,0.029477333029111225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,7,0.02914133419593175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,15,0.029520000020662945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,15,0.029557332396507263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,15,0.029722665747006733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,15,0.02923733244339625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,31,0.029680001238981884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,31,0.02940800040960312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,31,0.029520000020662945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,31,0.029301332930723827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,63,0.02977066735426585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,63,0.029674666623274486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,63,0.02938666691382726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,63,0.02951466788848241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,127,0.02977066735426585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,127,0.029578665892283123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,127,0.029205332199732464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,127,0.02976000060637792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,255,0.03141866624355316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,255,0.02977066735426585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,255,0.029717333614826202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,1,0.04321600000063578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,255,0.02934933453798294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,1,0.04187199970086416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,1,0.04113066693147024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,1,0.03979199876387914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,3,0.0432640016078949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,3,0.04201599955558777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,3,0.040074666341145836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,3,0.04008533308903376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,7,0.043381333351135254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,7,0.041802664597829185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,7,0.03990400085846583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,7,0.04201599955558777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,15,0.04342400034268697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,15,0.04197333256403605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,15,0.039861333866914116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,15,0.04163199911514918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,31,0.04298666616280874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,31,0.039850667119026184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,31,0.04177600145339966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,31,0.041722665230433144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,63,0.04380266865094503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,63,0.04344533383846283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,63,0.03988266736268997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,63,0.04191466669241587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,127,0.04214933514595032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,127,0.043791999419530235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,127,0.0401653324564298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,127,0.03986666599909464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,255,0.043791999419530235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,255,0.04598399996757507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,255,0.04182399809360504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,255,0.040207999447981514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,1,0.07229333122571309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,1,0.0729013333717982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,1,0.06656533479690552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,1,0.06646933158238728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,3,0.07223466535409291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,3,0.07257600128650665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,3,0.06781866649786632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,3,0.06629333396752675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,7,0.07259200016657512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,7,0.0724426656961441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,7,0.06631466746330261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,7,0.06642666459083557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,15,0.07214933137098949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,15,0.0726560006539027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,15,0.06629866858323415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,15,0.06614399949709575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,31,0.07257066667079926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,31,0.07272000114123027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,31,0.06636266907056172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,31,0.06630933284759521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,63,0.07245866457621257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,63,0.0724426656961441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,63,0.06647466619809468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,63,0.06642666459083557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,127,0.07256533205509186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,127,0.07255466779073079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,127,0.06618133187294006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,127,0.06644266843795776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,255,0.07634133100509644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,255,0.07482133309046428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,255,0.07047466437021892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,255,0.0682666649421056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,float16,1,0.6235359907150269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,fp8,1,0.5087146759033203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,float16,1,0.6238666772842407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,fp8,1,0.5077546834945679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,float16,3,0.6253866751988729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,fp8,3,0.507312019666036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,float16,3,0.6253493229548136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,fp8,3,0.5077333450317383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,float16,7,0.6422346830368042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,fp8,7,0.527237335840861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,fp8,7,0.5253706773122152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,float16,7,0.6433866818745931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,float16,15,0.6564853191375732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,float16,15,0.6581653356552124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,fp8,15,0.5489759842554728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,fp8,15,0.5503306786219279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,float16,31,0.7986613114674886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,float16,31,0.7980480194091797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,fp8,31,0.6998079617818197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,fp8,31,0.7012800375620524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,float16,63,0.8046240011850992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,float16,63,0.8042559623718262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,fp8,63,0.7058826287587484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,float16,127,0.8112640380859375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,fp8,63,0.705397367477417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,float16,127,0.8114506403605143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,fp8,127,0.7085440158843994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,float16,1,0.05022933085759481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,fp8,127,0.7082346280415853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,float16,1,0.0498933345079422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,fp8,1,0.04773333172003428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,float16,3,0.050026665131251015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,float16,3,0.049685334165891014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,fp8,3,0.04795733094215393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,float16,7,0.05014933149019877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,float16,7,0.050101334849993386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,fp8,7,0.04782933493455251
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,fp8,7,0.04831466575463613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,float16,15,0.049973333875338234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,float16,15,0.05008533100287119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,fp8,1,0.0484799991051356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,fp8,15,0.04830400149027506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,fp8,15,0.047983999053637184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,fp8,3,0.04833599925041199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,float16,31,0.05020800232887268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,float16,31,0.05003733436266581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,fp8,31,0.04779199759165446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,fp8,31,0.048063998421033226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,float16,63,0.050288001696268715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,fp8,63,0.04821866750717163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,fp8,63,0.04808533191680908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,float16,127,0.05009066561857859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,float16,127,0.05156800150871277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,fp8,127,0.04822933177153269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,fp8,127,0.04772266745567322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,float16,1,0.07690133154392242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,float16,1,0.07720533510049184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,fp8,1,0.07082666456699371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,fp8,1,0.07060266534487407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,float16,3,0.0764213353395462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,float16,3,0.07702933251857758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,fp8,3,0.07066133121649425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,float16,63,0.05068266888459524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,fp8,3,0.07085333267847697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,float16,7,0.07720000048478444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,float16,7,0.07664533456166585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,fp8,7,0.0708000014225642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,fp8,7,0.07050133248170216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,float16,15,0.07704000174999237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,float16,15,0.0763679991165797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,fp8,15,0.07045866549015045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,fp8,15,0.07082133491834004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,float16,31,0.07691200077533722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,float16,31,0.0766186664501826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,fp8,31,0.07032000025113423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,fp8,31,0.0705386648575465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,float16,63,0.07673066854476929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,float16,63,0.07673599819342296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,fp8,63,0.07018666466077168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,fp8,63,0.07091199855009715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,float16,127,0.07796266674995422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,float16,127,0.07833600044250488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,fp8,127,0.07203199962774913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,fp8,127,0.0719413310289383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,float16,1,0.1378933290640513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,float16,1,0.13634666800498962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,fp8,1,0.12365866700808208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,fp8,1,0.12396267056465149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,float16,3,0.13766933480898538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,float16,3,0.1362879971663157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,fp8,3,0.12367467085520427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,fp8,3,0.1237440009911855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,float16,7,0.13632532954216003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,float16,7,0.13613333304723105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,fp8,7,0.1237333317597707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,fp8,7,0.1237493356068929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,float16,15,0.13635200262069702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,fp8,15,0.12367467085520427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,fp8,15,0.12372266252835591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,float16,31,0.13620266318321228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,float16,31,0.1362826625506083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,fp8,31,0.1239306628704071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,fp8,31,0.12409599622090657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,float16,63,0.1362986663977305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,fp8,63,0.12379733721415202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,float16,63,0.13640532890955606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,fp8,63,0.12247999509175618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,float16,127,0.13461333513259888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,float16,127,0.13422399759292603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,1,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,float16,15,0.13672000169754028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,1,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,1,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,1,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,fp8,127,0.1237333317597707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,3,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,3,0.013482666263977686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,3,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,3,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,7,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,7,0.013546666751305262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,7,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,fp8,127,0.12424533565839131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,7,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,15,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,15,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,15,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,15,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,31,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,31,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,31,0.016063999384641647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,31,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,63,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,63,0.013301332791646322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,63,0.019258666783571243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,63,0.01933866615096728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,127,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,127,0.019461333751678467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,127,0.01321600005030632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,127,0.019461333751678467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,255,0.014778666198253632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,255,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,255,0.01940800001223882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,255,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,511,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,511,0.02179199953873952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,511,0.019386666516462963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,511,0.023530667026837666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,1023,0.014853333433469137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,1023,0.03166933357715607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,1023,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,1023,0.031845333675543465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,2047,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,2047,0.049973333875338234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,2047,0.019354666272799175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,2047,0.050373335679372154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,4095,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,4095,0.09091732899347942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,4095,0.019402666638294857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,4095,0.08804266651471455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,8191,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,8191,0.17121066649754843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,8191,0.019088000059127808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,8191,0.1627840002377828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,16383,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,16383,0.019786667078733444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,16383,0.33630398909250897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,16383,0.3102239966392517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,32767,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,32767,0.019365333020687103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,32767,0.9125440120697021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,1,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,1,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,1,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,1,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,3,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,3,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,3,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,32767,0.649610678354899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,3,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,7,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,7,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,7,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,15,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,7,0.01138666644692421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,15,0.011237333218256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,15,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,15,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,31,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,31,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,31,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,31,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,63,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,63,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,63,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,63,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,127,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,127,0.010559999694426855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,127,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,127,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,255,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,255,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,255,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,255,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,511,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,511,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,511,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,511,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,1023,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,1023,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,1023,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,2047,0.012784000486135483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,1023,0.013354666531085968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,2047,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,2047,0.012666666259368261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,2047,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,4095,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,4095,0.016000000139077503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,4095,0.016890666137139004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,4095,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,8191,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,8191,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,8191,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,8191,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,16383,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,16383,0.02586666742960612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,16383,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,16383,0.023584000766277313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,32767,0.013631999492645264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,32767,0.04131199916203817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,1,0.00996800015370051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,32767,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,1,0.009957333405812582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,1,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,1,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,32767,0.03549333413441976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,3,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,3,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,3,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,3,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,7,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,7,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,7,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,7,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,15,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,15,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,15,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,15,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,31,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,31,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,31,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,31,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,63,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,63,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,63,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,63,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,127,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,127,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,127,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,127,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,255,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,255,0.009477333476146063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,255,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,255,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,511,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,511,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,511,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,511,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,1023,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,1023,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,1023,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,1023,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,2047,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,2047,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,2047,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,2047,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,4095,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,4095,0.018725333114465077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,4095,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,4095,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,8191,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,8191,0.023365333676338196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,8191,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,8191,0.021514666577180225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,16383,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,16383,0.03756266583998998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,16383,0.013855999956528345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,16383,0.03170666595300039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,32767,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,32767,0.0583840012550354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,32767,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,32767,0.05219733218352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,1,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,1,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,1,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,1,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,3,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,3,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,3,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,3,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,7,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,7,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,7,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,7,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,15,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,15,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,15,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,15,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,31,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,31,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,31,0.01137599969903628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,31,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,63,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,63,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,63,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,63,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,127,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,127,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,127,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,127,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,255,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,255,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,255,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,255,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,511,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,511,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,511,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,511,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,1023,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,1023,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,1023,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,1023,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,2047,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,2047,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,2047,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,2047,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,4095,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,4095,0.02178666740655899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,4095,0.013295999417702356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,4095,0.02130666623512904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,8191,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,8191,0.03806933263937632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,8191,0.012847999731699625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,8191,0.029802667597929638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,16383,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,16383,0.05835199852784475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,16383,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,16383,0.05073066552480062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,32767,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,32767,0.0993333359559377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,32767,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,1,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,1,0.013455999394257864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,3,0.01801066721479098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,32767,0.08669867118199666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,3,0.015562667200962702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,7,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,7,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,15,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,15,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,31,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,31,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,63,0.018986667195955913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,63,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,127,0.019434666881958645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,127,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,255,0.029498666524887085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,255,0.028805332879225414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,511,0.046223998069763184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,511,0.045941332976023354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,1023,0.08061866462230682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,1023,0.07861333092053731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,2047,0.1476640005906423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,2047,0.14428266882896423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,4095,0.2818079988161723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,4095,0.27737067143122357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,8191,0.5522559881210327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,8191,0.5428266525268555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,1,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,3,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,1,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,3,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,7,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,7,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,15,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,15,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,31,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,31,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,63,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,16383,1.3284107049306233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,63,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,127,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,127,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,255,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,255,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,511,0.012847999731699625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,511,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,16383,1.3488960266113281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,1023,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,1023,0.013269333789745966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,2047,0.017632000148296356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,2047,0.017445333302021027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,4095,0.021402666966120403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,4095,0.02139200021823247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,8191,0.040074666341145836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,8191,0.027503999571005504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,1,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,1,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,16383,0.06126933296521505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,3,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,3,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,16383,0.04201066493988037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,7,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,7,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,15,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,15,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,31,0.011370666325092316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,31,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,63,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,63,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,127,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,127,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,255,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,255,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,511,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,511,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,1023,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,1023,0.012800000607967377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,2047,0.020799999435742695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,2047,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,4095,0.03762666632731756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,4095,0.02334933231274287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,8191,0.05930666625499725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,8191,0.037791999677817024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,1,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,16383,0.10102933645248413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,1,0.011493333925803503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,16383,0.05665066838264465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,3,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,3,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,7,0.012741333494583765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,7,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,15,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,15,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,31,0.012703999876976013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,31,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,63,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,63,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,127,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,127,0.012650666137536367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,255,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,255,0.012810666114091873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,511,0.013349333157142004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,511,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,1023,0.019093333433071773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,1023,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,2047,0.039701332648595176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,2047,0.023472001155217487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,4095,0.060362666845321655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,4095,0.03955733279387156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,8191,0.10487999518712361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,8191,0.059450666109720864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,1,0.008752000207702318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,16383,0.1896053353945414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,1,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,16383,0.10082667072614034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,3,0.008687999720374743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,3,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,7,0.008997333546479544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,7,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,15,0.009472000102202097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,15,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,31,0.009248000259200731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,31,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,63,0.008826666822036108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,63,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,127,0.009114666531483332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,127,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,255,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,255,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,511,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,511,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,1023,0.021344001094500225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,1023,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,2047,0.025637333591779072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,2047,0.027503999571005504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,4095,0.03583466758330663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,4095,0.035674666364987694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,8191,0.052282666166623436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,8191,0.05186666548252106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,16383,0.08693333466847737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,16383,0.08072533210118611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,32767,0.15822399655977884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,32767,0.13708266615867615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,1,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,3,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,1,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,3,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,7,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,7,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,15,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,65535,0.29814932743708294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,15,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,31,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,31,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,63,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,65535,0.2550933361053467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,63,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,127,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,127,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,255,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,255,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,511,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,511,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,1023,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,1023,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,2047,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,2047,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,4095,0.01321600005030632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,4095,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,8191,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,8191,0.017477333545684814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,16383,0.02142400046189626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,16383,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,32767,0.02475200096766154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,32767,0.023189333577950794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,1,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,1,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,65535,0.025253333151340485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,3,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,3,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,7,0.00985599992175897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,7,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,65535,0.025120000044504803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,15,0.009183999771873156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,15,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,31,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,31,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,63,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,63,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,127,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,127,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,255,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,255,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,511,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,511,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,1023,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,1023,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,2047,0.012170666207869848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,2047,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,4095,0.012847999731699625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,4095,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,8191,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,8191,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,16383,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,16383,0.01834133391578992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,32767,0.021488000949223835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,32767,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,1,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,1,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,65535,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,3,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,3,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,7,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,65535,0.021269333859284718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,7,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,15,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,15,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,31,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,31,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,63,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,63,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,127,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,255,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,127,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,255,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,511,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,511,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,1023,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,1023,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,2047,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,2047,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,4095,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,4095,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,8191,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,8191,0.015466666469971338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,16383,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,16383,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,32767,0.021568000316619873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,32767,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,1,0.008693333094318708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,1,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,3,0.008826666822036108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,7,0.008586666857202848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,7,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,65535,0.037802666425704956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,15,0.00922133338948091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,15,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,31,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,31,0.00916800027092298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,63,0.009029333169261614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,63,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,127,0.009066666786869368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,65535,0.024175999065240223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,127,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,255,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,255,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,511,0.02109333376089732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,511,0.017535999417304993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,1023,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,1023,0.02752533306678136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,2047,0.031850665807724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,2047,0.03366400053103765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,4095,0.047968000173568726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,4095,0.045653333266576133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,8191,0.0765173335870107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,8191,0.07075733443101247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,16383,0.13473600149154663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,16383,0.11950400471687317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,32767,0.25257599353790283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,32767,0.21589332818984985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,1,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,1,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,3,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,3,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,65535,0.48399468262990314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,7,0.012442667037248611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,7,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,15,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,15,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,65535,0.4107840061187744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,31,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,31,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,63,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,63,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,127,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,255,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,127,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,255,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,511,0.011482667177915573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,511,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,1023,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,1023,0.011519999553759893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,2047,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,2047,0.013253333667914072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,4095,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,4095,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,8191,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,8191,0.017583999782800674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,16383,0.021205333371957142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,16383,0.021269333859284718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,32767,0.02184533327817917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,32767,0.021136000752449036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,1,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,1,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,65535,0.025759999950726826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,3,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,3,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,7,0.010512000570694605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,7,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,15,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,65535,0.023311999936898548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,15,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,31,0.010570666442314783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,31,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,63,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,63,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,127,0.010469333579142889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,127,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,255,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,255,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,511,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,511,0.012815999488035837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,1023,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,1023,0.011519999553759893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,2047,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,2047,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,4095,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,4095,0.015615999698638916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,8191,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,8191,0.01735466718673706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,16383,0.020986666282018025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,16383,0.018944000204404194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,32767,0.02295999974012375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,32767,0.02090666691462199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,1,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,1,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,3,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,3,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,7,0.010351999973257383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,7,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,15,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,65535,0.039317332208156586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,15,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,31,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,31,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,65535,0.0235359991590182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,63,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,63,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,127,0.010490667074918747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,127,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,255,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,255,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,511,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,511,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,1023,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,1023,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,2047,0.014746667196353277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,2047,0.014544000228246054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,4095,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,4095,0.015498666713635126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,8191,0.019066666563351948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,8191,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,16383,0.021888000269730885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,16383,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,32767,0.03858133405447006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,32767,0.023402666052182514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,1,0.025514667232831318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,1,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,65535,0.059818665186564125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,3,0.027221334477265675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,3,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,65535,0.037818667789300285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,7,0.0273333340883255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,7,0.02422933280467987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,15,0.031498665610949196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,15,0.029322666426499683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,31,0.031317333380381264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,31,0.029306667546431225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,63,0.031258667508761086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,63,0.02957333376010259
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,127,0.036618667344252266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,127,0.033610666791598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,255,0.054234668612480164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,255,0.05046399931112925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,511,0.08489599823951721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,511,0.08297599852085114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,1023,0.1508639951546987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,1023,0.14629333217938742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,2047,0.28357332944869995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,2047,0.2738933364550273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,4095,0.5473333199818929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,4095,0.5295360088348389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,1,0.011365332951148352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,1,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,3,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,8191,1.1025333404541016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,3,0.011407999942700068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,7,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,8191,1.0480586687723796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,7,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,15,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,31,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,15,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,31,0.012725333372751871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,63,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,63,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,127,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,127,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,255,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,255,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,511,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,511,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,1023,0.013936000565687815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,1023,0.01471466695268949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,2047,0.02109333376089732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,2047,0.019541333119074505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,4095,0.025450666745503742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,4095,0.039274667700131737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,8191,0.06029333174228668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,1,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,1,0.012357333054145178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,8191,0.04141333450873693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,3,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,3,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,7,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,7,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,15,0.013194666554530462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,15,0.012768000364303589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,31,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,63,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,63,0.01191466674208641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,31,0.013248000293970108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,127,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,127,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,255,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,255,0.011535999675591787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,511,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,511,0.013274667163689932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,1023,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,1023,0.015989333391189575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,2047,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,2047,0.0397119993964831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,4095,0.06058133145173391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,4095,0.04027199993530909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,1,0.014757333944241205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,1,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,3,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,8191,0.060346667965253196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,8191,0.10308266679445903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,3,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,7,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,7,0.01379199946920077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,15,0.014896000425020853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,15,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,31,0.014794666320085526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,31,0.014607999473810196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,63,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,63,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,127,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,127,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,255,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,255,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,511,0.018965333700180054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,511,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,1023,0.036117332677046456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,1023,0.020960000654061634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,2047,0.061717331409454346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,2047,0.04054400076468786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,4095,0.10357333223025005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,4095,0.061664000153541565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,1,0.00927466650803884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,1,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,8191,0.1892426609992981
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,8191,0.10315199693044026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,3,0.009408000235756239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,3,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,7,0.009375999992092451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,7,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,15,0.008992000172535578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,15,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,31,0.009189333145817121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,31,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,63,0.010506667196750641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,63,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,127,0.009216000015536943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,127,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,255,0.021503999829292297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,255,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,511,0.023562667270501454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,511,0.02714666724205017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,1023,0.02934933453798294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,1023,0.029472000896930695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,2047,0.039664000272750854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,2047,0.040106666584809623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,4095,0.06213866670926412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,4095,0.05834666887919108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,8191,0.1065013309319814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,8191,0.09511466821034749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,16383,0.19369065761566162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,16383,0.1686613361040751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,32767,0.3687146504720052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,32767,0.3124053279558818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,1,0.01146666705608368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,1,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,3,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,3,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,7,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,65535,0.7358720302581787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,7,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,15,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,15,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,65535,0.6048853397369385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,31,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,31,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,63,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,127,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,63,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,127,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,255,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,255,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,511,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,511,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,1023,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,1023,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,2047,0.013370666652917862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,2047,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,4095,0.01747200017174085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,4095,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,8191,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,8191,0.01961600035429001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,16383,0.021322667598724365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,16383,0.019823999454577763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,32767,0.024506665766239166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,1,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,32767,0.022970666488011677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,1,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,3,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,3,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,7,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,7,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,65535,0.027269333600997925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,15,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,65535,0.042080000042915344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,15,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,31,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,31,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,63,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,63,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,127,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,127,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,255,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,255,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,511,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,511,0.012469333906968435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,1023,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,1023,0.011338666081428528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,2047,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,2047,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,4095,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,4095,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,8191,0.019317333896954853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,8191,0.01934933289885521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,16383,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,16383,0.021514666577180225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,32767,0.040549332896868386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,32767,0.024656000236670177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,1,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,1,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,65535,0.06135466694831848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,3,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,3,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,7,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,65535,0.04028266668319702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,7,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,15,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,15,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,31,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,31,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,63,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,63,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,127,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,127,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,255,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,255,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,511,0.011882666498422623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,511,0.011541333049535751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,1023,0.012682666381200155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,1023,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,2047,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,2047,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,4095,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,4095,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,8191,0.021104000508785248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,8191,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,16383,0.03993066648642222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,16383,0.023370665808518726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,32767,0.06016000111897787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,32767,0.03772799919048945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,1,0.044112001856168113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,65535,0.10265066226323445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,1,0.037791999677817024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,65535,0.05793066819508871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,3,0.045824001232783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,3,0.03938666731119156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,7,0.04612799982229868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,15,0.056048000852266945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,7,0.039861333866914116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,15,0.04990933338801066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,31,0.05613866448402405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,31,0.0497920016447703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,63,0.05611733098824819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,63,0.0498933345079422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,127,0.06449600060780843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,127,0.06015466650327047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,255,0.09673066933949788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,255,0.09097066521644592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,511,0.15874666968981424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,511,0.152346670627594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,1023,0.2836373249689738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,1023,0.2717653314272563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,2047,0.5433386564254761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,2047,0.5152586698532104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,1,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,1,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,3,0.013424000392357508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,3,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,7,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,4095,1.0634453296661377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,7,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,4095,1.006053368250529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,15,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,31,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,15,0.013397333522637686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,31,0.01463466634353002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,63,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,63,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,127,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,255,0.01340266689658165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,127,0.01360000049074491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,255,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,511,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,511,0.01553600033124288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,1023,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,1023,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,2047,0.03990933299064636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,2047,0.025674665967623394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,4095,0.06262933214505513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,1,0.016666666915019352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,1,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,4095,0.04223999877770742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,3,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,3,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,7,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,7,0.01394133393963178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,15,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,15,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,31,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,31,0.01640533283352852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,63,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,63,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,127,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,127,0.013306666165590286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,255,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,255,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,511,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,511,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,1023,0.03554133325815201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,1023,0.0220266655087471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,2047,0.06169599791367849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,2047,0.04081599911053976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,4095,0.1037493348121643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,1,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,4095,0.06109866499900818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,3,0.01961600035429001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,1,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,3,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,7,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,7,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,15,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,15,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,31,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,31,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,63,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,63,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,127,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,127,0.01933866615096728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,255,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,255,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,511,0.03488533447186152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,511,0.02123733361562093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,1023,0.05533333122730255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,1023,0.034661332766215004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,2047,0.09851200381914775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,2047,0.05912533402442932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,4095,0.17650665839513144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,4095,0.09511466821034749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,1,0.08288000027338664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,1,0.0705973356962204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,3,0.0849173367023468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,3,0.07228800157705943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,7,0.08708266417185466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,7,0.07451733450094859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,15,0.1053493320941925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,15,0.09317333499590556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,31,0.10446932911872864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,31,0.09322667121887207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,63,0.1053600013256073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,63,0.09322667121887207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,127,0.12395733594894409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,127,0.11357866724332173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,255,0.1855093240737915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,255,0.17313599586486816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,511,0.2958186666170756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,511,0.30908799171447754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,1,0.017477333545684814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,1023,0.5670559803644816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,1023,0.5335253477096558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,1,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,3,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,3,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,7,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,15,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,7,0.017466666797796886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,15,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,31,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,31,0.015557333827018738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,63,0.016773333152135212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,63,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,127,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,127,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,255,0.016890666137139004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,255,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,511,0.020954666038354237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,511,0.018794666975736618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,1,0.021082667013009388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,1023,0.03722666700681051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,1023,0.025087999800841015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,1,0.018800000349680584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,3,0.019472000499566395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,3,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,7,0.019589333484570186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,7,0.01899733394384384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,15,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,15,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,31,0.01940800001223882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,31,0.018778666853904724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,63,0.019066666563351948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,63,0.01934933289885521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,127,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,127,0.018725333114465077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,255,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,255,0.018863999595244724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,511,0.03538133452335993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,511,0.02342933416366577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,1023,0.05523733297983805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,1023,0.03548266738653183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,1,0.02922133356332779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,1,0.025568000972270966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,3,0.029626667499542236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,3,0.025248001019159954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,7,0.029071999092896778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,7,0.02537599951028824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,15,0.02917333443959554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,15,0.025583999852339428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,31,0.029616000751654308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,31,0.025445332129796345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,63,0.029189333319664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,63,0.02569066733121872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,127,0.02941333254178365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,127,0.025557334224383037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,255,0.03557866563399633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,255,0.02534399926662445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,511,0.05612266560395559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,511,0.037791999677817024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,1023,0.09731733798980713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,1023,0.05585599939028422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,1,0.16061333815256754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,1,0.1332480013370514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,3,0.16497066617012024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,3,0.1381653348604838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,7,0.1669279932975769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,7,0.14430399735768637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,15,0.2037066618601481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,15,0.1791306734085083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,31,0.20258132616678873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,31,0.18123199542363486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,63,0.2038080096244812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,63,0.18106667200724283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,127,0.24074133237202963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,127,0.21995733181635538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,255,0.36352535088857013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,255,0.340992013613383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,1,0.023056000471115112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,511,0.6260373195012411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,511,0.5806879997253418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,1,0.021226666867733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,3,0.021002667645613354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,3,0.02364266663789749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,7,0.023562667270501454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,7,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,15,0.02342933416366577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,31,0.02347733328739802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,15,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,31,0.0220266655087471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,63,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,63,0.022255999346574146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,127,0.022064000368118286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,127,0.023242667317390442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,255,0.023183998962243397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,255,0.02199999988079071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,511,0.03801066676775614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,511,0.027285332481066387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,1,0.03086400032043457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,1,0.02918400118748347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,3,0.03156266609827677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,3,0.02771199991305669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,7,0.030799999833106995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,7,0.02943466603755951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,15,0.031184000273545582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,15,0.02771199991305669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,31,0.03142933299144109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,31,0.029045333464940388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,63,0.031184000273545582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,63,0.027552001178264618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,127,0.03138133386770884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,127,0.02756800005833308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,255,0.037818667789300285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,255,0.027322667340437572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,511,0.058965335289637245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,511,0.03941866755485535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,1,0.04934933284918467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,1,0.0418453315893809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,3,0.04782933493455251
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,3,0.04196799794832865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,7,0.047914668917655945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,7,0.04162666698296865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,15,0.047770669062932335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,15,0.041877334316571556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,31,0.048058668772379555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,31,0.042026668787002563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,63,0.04780266682306925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,63,0.04178133110205332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,127,0.05013333261013031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,127,0.04174399872620901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,255,0.05874133110046387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,255,0.04517333209514618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,1,0.013440000514189402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,1,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,511,0.0986346701780955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,3,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,3,0.010570666442314783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,511,0.06235733131567637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,7,0.012778667112191519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,7,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,15,0.013530666629473368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,15,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,31,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,31,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,63,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,63,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,127,0.014853333433469137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,127,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,255,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,255,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,511,0.021274665991465252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,511,0.01926933353145917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,1023,0.03162666658560435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,1023,0.029317334294319153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,2047,0.05218133330345154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,2047,0.047983999053637184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,4095,0.09125333031018575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,4095,0.08519466718037923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,8191,0.1688906749089559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,8191,0.1606880029042562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,16383,0.3264426589012146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,16383,0.31037867069244385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,32767,0.6440639893213908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,32767,0.6991893450419108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,1,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,1,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,3,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,3,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,7,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,7,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,15,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,65535,1.6640960375467937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,15,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,31,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,31,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,63,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,63,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,127,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,127,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,65535,1.51583464940389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,255,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,255,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,511,0.011418666690587997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,511,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,1023,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,1023,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,2047,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,2047,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,4095,0.01743999992807706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,4095,0.01809599995613098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,8191,0.019354666272799175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,8191,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,16383,0.024671999116738636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,16383,0.021359999974568684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,32767,0.04214933514595032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,32767,0.027509334186712902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,1,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,1,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,3,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,65535,0.06198399762312571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,3,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,7,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,7,0.011402666568756104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,15,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,15,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,31,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,31,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,65535,0.042709335684776306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,63,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,63,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,127,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,127,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,255,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,255,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,511,0.011711999773979187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,511,0.01246400053302447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,1023,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,1023,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,2047,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,2047,0.015578666081031164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,4095,0.017551999539136887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,4095,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,8191,0.021146667500336964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,8191,0.019391999890406925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,16383,0.038202665746212006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,16383,0.02346666653951009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,32767,0.0605973352988561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,32767,0.038133333126703896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,1,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,1,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,3,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,3,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,7,0.010527999450763067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,7,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,65535,0.05845866600672404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,65535,0.10193066795667012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,15,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,15,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,31,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,31,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,63,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,63,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,127,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,127,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,255,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,255,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,511,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,511,0.011477333803971609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,1023,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,1023,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,2047,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,2047,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,4095,0.02120000123977661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,4095,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,8191,0.039488000174363456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,8191,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,16383,0.060826669136683144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,16383,0.03846933444341024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,32767,0.1042133371035258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,32767,0.05782400071620941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,65535,0.18941867351531982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,65535,0.097461332877477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,1,0.3121440013249715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,1,0.2592853307723999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,3,0.32309865951538086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,3,0.2675679922103882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,7,0.3288533290227254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,7,0.27941866715749103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,15,0.398362676302592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,15,0.35258134206136066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,31,0.3986293474833171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,31,0.35386133193969727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,63,0.40035200119018555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,63,0.3557973305384318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,127,0.4740320046742757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,127,0.4310239950815837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,1,0.03623466690381368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,255,0.7468799750010172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,255,0.6745973428090414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,1,0.03340800106525421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,3,0.03749866783618927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,3,0.03358400116364161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,7,0.03692266593376795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,7,0.03367999941110611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,15,0.037445334096749626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,15,0.03346666693687439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,31,0.033930666744709015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,31,0.0358240008354187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,63,0.03770666569471359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,63,0.03374933451414108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,127,0.03775466730197271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,127,0.03344533344109853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,255,0.04271999994913737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,255,0.03331200033426285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,1,0.05203199883302053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,1,0.04586133360862732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,3,0.05218133330345154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,3,0.04595200220743815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,7,0.05203199883302053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,7,0.046096002062161766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,15,0.0461760014295578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,15,0.05215999980767568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,31,0.046053335070610046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,31,0.052202666799227394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,63,0.05198400219281515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,63,0.04588800172011057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,127,0.054272000988324486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,127,0.04773333172003428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,255,0.05005866785844167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,255,0.06371200084686279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,1,0.07646400233109792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,1,0.08683733145395915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,3,0.07640000184377034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,3,0.08890133102734883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,7,0.08859733740488689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,7,0.07470933099587758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,15,0.08797867099444072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,15,0.07669866581757863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,31,0.08899199962615967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,31,0.0748586654663086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,63,0.0876693328221639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,63,0.0759200006723404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,127,0.08920533458391826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,127,0.07639466722806294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,255,0.0801333338022232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,255,0.10470933715502422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,fp8,1,0.5088053146998087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,float16,1,0.6171040137608846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,fp8,3,0.527077317237854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,float16,3,0.6403520107269287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,fp8,7,0.5534666776657104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,float16,7,0.6525333325068156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,fp8,15,0.6978186766306559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,float16,15,0.7898773352305094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,float16,31,0.7917013168334961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,fp8,31,0.6996106306711832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,float16,63,0.7980106671651205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,fp8,63,0.7017813523610433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,fp8,127,0.8687252998352051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,float16,127,0.9863253434499105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,float16,1,0.062394668658574425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,fp8,1,0.05602133274078369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,float16,3,0.06216000020503998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,fp8,3,0.056261335810025535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,float16,7,0.06276266773541768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,fp8,7,0.05634133517742157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,float16,15,0.062133332093556724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,fp8,15,0.056474665800730385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,float16,31,0.06229333579540253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,fp8,31,0.05606933434804281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,float16,63,0.06267733375231425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,fp8,63,0.05598933498064677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,fp8,127,0.058362667759259544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,float16,127,0.06275733311971028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,float16,1,0.09321066737174988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,float16,3,0.09416533509890239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,fp8,1,0.08310399949550629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,fp8,3,0.08286933104197185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,fp8,7,0.08417600393295288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,float16,15,0.09313600262006123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,fp8,15,0.08281066517035167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,float16,7,0.09366400043169658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,float16,31,0.09432533383369446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,fp8,31,0.08276266853014629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,float16,63,0.09310932954152425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,fp8,63,0.08303999900817871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,float16,127,0.09611200292905171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,float16,1,0.1616426706314087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,fp8,127,0.08337600032488506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,fp8,1,0.1411946713924408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,float16,7,0.1627786656220754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,fp8,7,0.14113066593805948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,float16,15,0.16266133387883505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,fp8,15,0.14056000113487244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,fp8,31,0.1402720014254252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,float16,63,0.16274666786193848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,fp8,63,0.14086932937304178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,float16,3,0.16339733203252158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,fp8,3,0.14046933253606161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,1,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,1,0.01580799991885821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,3,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,3,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,7,0.013722666849692663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,7,0.015429332852363586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,15,0.01340266689658165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,15,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,float16,127,0.16364799936612448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,31,0.013418667018413544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,fp8,127,0.14033066232999167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,31,0.019509332875410717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,63,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,63,0.019861333072185516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,127,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,127,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,255,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,255,0.02309333284695943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,511,0.029765332738558452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,511,0.03178666780392329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,1023,0.050186668833096824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,1023,0.050399998823801674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,float16,31,0.16318933169047037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,2047,0.09083200494448344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,2047,0.086709330479304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,4095,0.16964799165725708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,4095,0.16087466478347778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,8191,0.32893866300582886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,8191,0.30796800057093304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,16383,0.6568053166071574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,16383,0.5999199946721395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,1,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,1,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,3,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,3,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,7,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,7,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,15,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,15,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,31,0.011349332829316458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,31,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,63,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,63,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,127,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,32767,1.49727996190389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,127,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,255,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,255,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,511,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,511,0.011338666081428528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,1023,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,1023,0.012805332740147909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,2047,0.016538667182127636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,32767,2.3297386169433594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,2047,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,4095,0.019013332823912304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,4095,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,8191,0.021429332594076794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,8191,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,16383,0.04027199993530909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,16383,0.025562666356563568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,1,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,1,0.011445333560307821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,3,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,3,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,7,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,7,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,32767,0.060831998785336815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,32767,0.04193066557248434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,15,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,15,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,31,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,31,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,63,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,63,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,127,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,127,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,255,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,255,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,511,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,511,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,1023,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,1023,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,2047,0.01759999990463257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,2047,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,4095,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,4095,0.01966399947802226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,8191,0.03963200002908707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,8191,0.023221333821614582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,16383,0.06066666543483734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,16383,0.038218667109807335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,1,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,1,0.011567999919255575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,32767,0.10284800330797832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,3,0.011407999942700068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,3,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,32767,0.05834133426348368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,7,0.011391999820868174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,7,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,15,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,15,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,31,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,31,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,63,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,63,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,127,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,127,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,255,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,255,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,511,0.011941333611806234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,511,0.011648000528415045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,1023,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,1023,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,2047,0.01977066695690155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,2047,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,4095,0.03751999884843826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,4095,0.023168000082174938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,8191,0.06035733222961426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,8191,0.03773866593837738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,16383,0.10205866893132527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,16383,0.05734399954477946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,1,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,1,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,32767,0.18683733542760214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,1,0.011237333218256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,32767,0.09666666388511658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,3,0.010575999816258749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,1,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,3,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,3,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,3,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,7,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,7,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,7,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,7,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,15,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,15,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,15,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,15,0.011472000430027643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,31,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,31,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,31,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,31,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,63,0.010431999961535135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,63,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,63,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,63,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,127,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,127,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,127,0.010575999816258749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,127,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,255,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,255,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,255,0.010570666442314783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,255,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,511,0.010405333091815313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,511,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,511,0.011584000041087469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,511,0.012181332955757776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,1023,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,1023,0.011413333316644033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,1023,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,1023,0.01145600030819575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,2047,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,2047,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,2047,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,2047,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,4095,0.012730666746695837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,4095,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,4095,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,4095,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,8191,0.01349866638580958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,8191,0.020736000190178554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,8191,0.013397333522637686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,8191,0.018992000569899876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,16383,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,16383,0.02585600068171819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,16383,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,16383,0.025226667523384094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,32767,0.013338666409254074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,32767,0.04192533095677694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,32767,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,1,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,1,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,1,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,1,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,3,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,3,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,32767,0.035818666219711304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,3,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,3,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,7,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,7,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,7,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,7,0.011503999431928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,15,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,15,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,15,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,15,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,31,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,31,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,31,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,31,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,63,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,63,0.011370666325092316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,63,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,63,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,127,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,127,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,127,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,127,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,255,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,255,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,255,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,255,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,511,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,511,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,511,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,511,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,1023,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,1023,0.012810666114091873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,1023,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,1023,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,2047,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,2047,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,2047,0.01340266689658165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,4095,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,2047,0.015834666788578033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,4095,0.019567999988794327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,4095,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,4095,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,8191,0.013248000293970108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,8191,0.023530667026837666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,8191,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,8191,0.023226665953795116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,16383,0.013349333157142004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,16383,0.03965333352486292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,16383,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,16383,0.03200000027815501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,32767,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,32767,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,32767,0.059978668888409935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,32767,0.05202666421731313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,1,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,1,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,1,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,1,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,3,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,3,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,3,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,3,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,7,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,7,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,7,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,7,0.011450666934251785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,15,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,15,0.010527999450763067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,15,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,15,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,31,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,31,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,31,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,31,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,63,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,63,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,63,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,63,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,127,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,127,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,127,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,127,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,255,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,255,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,255,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,255,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,511,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,511,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,511,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,511,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,1023,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,1023,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,1023,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,1023,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,2047,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,2047,0.017093333105246227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,2047,0.013167999684810638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,2047,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,4095,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,4095,0.02317333221435547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,4095,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,4095,0.021322667598724365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,8191,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,8191,0.037903999288876854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,8191,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,8191,0.02943466603755951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,16383,0.013317332913478216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,16383,0.05864533285299937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,16383,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,16383,0.05212266743183136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,32767,0.013471999516089758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,32767,0.10045866171518962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,1,0.012671999633312225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,32767,0.014576000471909841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,1,0.012730666746695837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,1,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,1,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,3,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,3,0.012752000242471695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,32767,0.08667733271916707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,3,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,3,0.012373333175977072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,7,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,7,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,7,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,7,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,15,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,15,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,15,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,15,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,31,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,31,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,31,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,31,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,63,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,63,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,63,0.01137599969903628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,63,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,127,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,127,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,127,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,127,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,255,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,255,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,255,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,255,0.012784000486135483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,511,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,511,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,511,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,511,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,1023,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,1023,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,1023,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,1023,0.015802666544914246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,2047,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,2047,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,2047,0.023370665808518726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,2047,0.02312533309062322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,4095,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,4095,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,4095,0.03974399964014689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,4095,0.03202133377393087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,8191,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,8191,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,8191,0.0598880002895991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,8191,0.05236800014972687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,16383,0.01482133318980535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,16383,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,16383,0.10359467069307964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,16383,0.08757866422335307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,32767,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,32767,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,32767,0.1870186726252238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,1,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,1,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,1,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,32767,0.1580586632092794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,1,0.010538666198650995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,3,0.010501333822806677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,3,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,3,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,7,0.010565333068370819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,3,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,7,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,7,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,7,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,15,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,15,0.010170666500926018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,15,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,15,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,31,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,31,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,31,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,31,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,63,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,63,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,63,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,63,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,127,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,127,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,127,0.01815466706951459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,127,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,255,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,255,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,255,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,255,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,511,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,511,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,511,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,511,0.011391999820868174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,1023,0.00892800030608972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,1023,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,1023,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,1023,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,2047,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,2047,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,2047,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,2047,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,4095,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,4095,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,4095,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,4095,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,8191,0.010037333394090334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,8191,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,8191,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,8191,0.014837333311637243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,16383,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,16383,0.019968000551064808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,16383,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,16383,0.019509332875410717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,32767,0.011381333072980246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,32767,0.0272533322374026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,32767,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,32767,0.02735999971628189
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,65535,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,65535,0.03180799881617228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,65535,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,65535,0.03133866687615713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,131071,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,131071,0.034501334031422935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,1,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,1,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,131071,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,1,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,1,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,3,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,3,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,3,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,3,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,7,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,7,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,7,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,7,0.010522666076819101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,15,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,15,0.009088000282645226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,15,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,15,0.011434666812419891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,31,0.008858666444818178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,31,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,31,0.008821333448092142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,131071,0.03376533339420954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,31,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,63,0.010485333700974783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,63,0.00916800027092298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,63,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,63,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,127,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,127,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,127,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,255,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,127,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,255,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,255,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,255,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,511,0.009839999799927076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,511,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,511,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,511,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,1023,0.009941333283980688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,1023,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,1023,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,1023,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,2047,0.008943999807039896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,2047,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,2047,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,2047,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,4095,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,4095,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,4095,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,4095,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,8191,0.00890666681031386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,8191,0.015504000087579092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,8191,0.011381333072980246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,8191,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,16383,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,16383,0.019466667125622433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,16383,0.013370666652917862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,16383,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,32767,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,32767,0.02142400046189626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,32767,0.01145600030819575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,32767,0.02346133440732956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,65535,0.009418666362762451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,65535,0.023567999402681988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,65535,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,65535,0.023210667073726654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,131071,0.012789333860079447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,131071,0.02739733209212621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,1,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,1,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,1,0.011450666934251785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,131071,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,1,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,3,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,3,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,3,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,3,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,7,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,7,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,7,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,7,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,15,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,15,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,131071,0.025514667232831318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,15,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,15,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,31,0.009237333511312803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,31,0.008885333314538002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,31,0.010543999572594961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,31,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,63,0.009279999881982803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,63,0.01055466632048289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,63,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,63,0.010415999839703241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,127,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,127,0.010058666889866194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,127,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,127,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,255,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,255,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,255,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,255,0.00943999985853831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,511,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,511,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,511,0.010549332946538925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,511,0.011391999820868174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,1023,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,1023,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,1023,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,1023,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,2047,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,2047,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,2047,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,2047,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,4095,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,4095,0.012815999488035837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,4095,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,4095,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,8191,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,8191,0.016634666671355564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,8191,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,8191,0.01579733317097028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,16383,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,16383,0.018133333573738735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,16383,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,16383,0.01850133389234543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,32767,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,32767,0.019461333751678467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,32767,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,32767,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,65535,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,65535,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,65535,0.021055998901526134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,65535,0.02103466788927714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,131071,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,131071,0.027215999861558277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,131071,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,131071,0.02743999908367793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,1,0.010538666198650995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,1,0.009130666653315226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,1,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,1,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,3,0.008933333059151968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,3,0.00973866693675518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,3,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,3,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,7,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,7,0.009119999905427298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,7,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,7,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,15,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,15,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,15,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,15,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,31,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,31,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,31,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,31,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,63,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,63,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,63,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,63,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,127,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,127,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,127,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,127,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,255,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,255,0.010533332824707031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,255,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,255,0.010245333115259806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,511,0.009578666960199675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,511,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,511,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,511,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,1023,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,1023,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,1023,0.010501333822806677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,1023,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,2047,0.010101333260536194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,2047,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,2047,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,2047,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,4095,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,4095,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,4095,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,4095,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,8191,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,8191,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,8191,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,8191,0.015578666081031164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,16383,0.009818666925032934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,16383,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,16383,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,16383,0.01685333376129468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,32767,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,32767,0.019466667125622433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,32767,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,32767,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,65535,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,65535,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,65535,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,65535,0.023178666830062866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,131071,0.013253333667914072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,131071,0.039733332892258964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,1,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,1,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,1,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,131071,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,3,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,1,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,3,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,3,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,3,0.011338666081428528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,7,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,7,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,7,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,15,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,7,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,15,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,15,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,15,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,31,0.009994666402538618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,31,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,31,0.010186666622757912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,63,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,63,0.010581333190202713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,31,0.017466666797796886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,131071,0.03142933299144109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,63,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,127,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,63,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,127,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,127,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,127,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,255,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,255,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,255,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,255,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,511,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,511,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,511,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,511,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,1023,0.00878399983048439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,1023,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,1023,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,1023,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,2047,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,2047,0.012826666235923767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,2047,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,2047,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,4095,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,4095,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,4095,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,4095,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,8191,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,8191,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,8191,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,8191,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,16383,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,16383,0.0195573332409064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,16383,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,16383,0.02109333376089732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,32767,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,32767,0.02350933353106181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,32767,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,32767,0.021776000658671062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,65535,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,65535,0.025093334416548412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,65535,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,65535,0.02372266600529353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,131071,0.013349333157142004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,131071,0.028186666468779247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,1,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,131071,0.013237333546082178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,1,0.01051733394463857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,1,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,1,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,3,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,3,0.008890666688481966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,3,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,3,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,7,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,7,0.009488000224033991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,7,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,7,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,15,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,15,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,15,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,131071,0.02899733434120814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,15,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,31,0.00919999989370505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,31,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,31,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,31,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,63,0.009429333110650381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,63,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,63,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,127,0.009216000015536943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,63,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,127,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,127,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,127,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,255,0.009893333539366722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,255,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,255,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,255,0.010410666465759277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,511,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,511,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,511,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,511,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,1023,0.012304000556468964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,1023,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,1023,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,1023,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,2047,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,2047,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,2047,0.012645332763592402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,2047,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,4095,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,4095,0.012703999876976013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,4095,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,4095,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,8191,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,8191,0.016688000410795212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,8191,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,8191,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,16383,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,16383,0.018816000471512478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,16383,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,16383,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,32767,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,32767,0.019354666272799175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,32767,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,32767,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,65535,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,65535,0.02145066608985265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,65535,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,65535,0.021082667013009388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,131071,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,131071,0.027717334528764088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,131071,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,1,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,1,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,1,0.011541333049535751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,1,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,3,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,3,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,3,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,3,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,7,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,7,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,7,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,7,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,15,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,15,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,15,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,15,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,31,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,31,0.011418666690587997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,31,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,31,0.011727999895811081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,63,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,63,0.010069333637754122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,131071,0.027456000447273254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,63,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,63,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,127,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,127,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,127,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,127,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,255,0.010565333068370819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,255,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,255,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,255,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,511,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,511,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,511,0.010559999694426855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,511,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,1023,0.01009599988659223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,1023,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,1023,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,1023,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,2047,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,2047,0.011434666812419891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,2047,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,2047,0.012741333494583765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,4095,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,4095,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,4095,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,4095,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,8191,0.011482667177915573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,8191,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,8191,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,8191,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,16383,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,16383,0.017504000415404636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,16383,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,16383,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,32767,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,32767,0.019461333751678467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,32767,0.01138666644692421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,32767,0.019424000134070713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,65535,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,65535,0.024890666206677754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,65535,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,65535,0.02327999969323476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,131071,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,1,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,131071,0.0405973345041275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,131071,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,1,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,1,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,3,0.010442666709423065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,1,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,3,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,131071,0.03156800071398417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,3,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,3,0.010533332824707031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,7,0.008858666444818178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,7,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,7,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,7,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,15,0.008901333436369896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,15,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,15,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,15,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,31,0.008912000184257826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,31,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,31,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,31,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,63,0.008912000184257826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,63,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,63,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,63,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,127,0.00877333308259646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,127,0.011424000064531961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,127,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,127,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,255,0.00933333362142245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,255,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,255,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,255,0.009888000165422758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,511,0.010549332946538925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,511,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,511,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,511,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,1023,0.010128000130256018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,1023,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,1023,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,1023,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,2047,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,2047,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,2047,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,2047,0.014783999572197596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,4095,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,4095,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,4095,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,4095,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,8191,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,8191,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,8191,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,8191,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,16383,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,16383,0.018976000448067982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,16383,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,16383,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,32767,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,32767,0.023530667026837666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,32767,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,32767,0.023141334454218548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,65535,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,65535,0.03706666578849157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,65535,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,65535,0.029743999242782593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,131071,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,131071,0.05787733197212219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,1,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,131071,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,1,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,1,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,1,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,3,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,3,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,3,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,3,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,7,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,7,0.011418666690587997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,7,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,15,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,7,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,131071,0.05106133222579956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,15,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,15,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,15,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,31,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,31,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,31,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,31,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,63,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,63,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,63,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,63,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,127,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,127,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,127,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,127,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,255,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,255,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,255,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,255,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,511,0.011541333049535751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,511,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,511,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,511,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,1023,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,1023,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,1023,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,1023,0.012810666114091873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,2047,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,2047,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,2047,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,2047,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,4095,0.013418667018413544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,4095,0.02120000123977661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,4095,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,4095,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,8191,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,8191,0.025674665967623394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,8191,0.013194666554530462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,8191,0.025301332275072735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,16383,0.01349866638580958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,16383,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,16383,0.04200000067551931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,16383,0.03536533315976461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,1,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,1,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,1,0.011503999431928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,1,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,3,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,3,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,3,0.011482667177915573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,3,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,7,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,7,0.011424000064531961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,7,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,7,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,15,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,15,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,15,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,15,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,31,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,31,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,31,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,31,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,63,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,63,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,63,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,63,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,127,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,127,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,127,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,127,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,255,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,255,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,255,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,255,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,511,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,511,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,511,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,511,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,1023,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,1023,0.012810666114091873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,1023,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,1023,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,2047,0.013440000514189402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,2047,0.018533332894245785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,2047,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,2047,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,4095,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,4095,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,4095,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,4095,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,8191,0.014005333185195923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,8191,0.038805333276589714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,8191,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,8191,0.03133866687615713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,16383,0.014479999740918478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,16383,0.05895466605822245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,16383,0.01333333303531011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,1,0.011727999895811081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,1,0.012479999413092932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,1,0.011530666301647821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,1,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,3,0.01137599969903628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,16383,0.05208533505598704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,3,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,3,0.011349332829316458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,3,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,7,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,7,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,7,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,7,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,15,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,15,0.012725333372751871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,15,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,15,0.012730666746695837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,31,0.011370666325092316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,31,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,31,0.012618667135636011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,63,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,63,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,63,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,31,0.011584000041087469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,63,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,127,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,127,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,127,0.011584000041087469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,127,0.0120319997270902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,255,0.011877333124478659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,255,0.012655999511480331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,255,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,255,0.011338666081428528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,511,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,511,0.012714666624863943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,511,0.012693333129088083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,511,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,1023,0.01251199965675672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,1023,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,1023,0.012597333639860153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,1023,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,2047,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,2047,0.02325333406527837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,2047,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,2047,0.021557333568731945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,4095,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,4095,0.03965333352486292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,4095,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,4095,0.03153600047032038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,8191,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,8191,0.06069866816202799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,8191,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,8191,0.052416001756985985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,16383,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,16383,0.10422933101654053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,1,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,1,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,16383,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,1,0.014757333944241205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,1,0.013461332768201828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,16383,0.08867733677228291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,3,0.014831999937693277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,3,0.013418667018413544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,3,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,3,0.014607999473810196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,7,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,7,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,7,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,7,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,15,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,15,0.013354666531085968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,15,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,15,0.013381333400805792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,31,0.013429333766301474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,31,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,31,0.013365333278973898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,31,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,63,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,63,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,63,0.013248000293970108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,63,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,127,0.014757333944241205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,127,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,127,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,127,0.01482133318980535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,255,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,255,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,255,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,255,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,511,0.013946666071812311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,511,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,511,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,511,0.016783999900023144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,1023,0.013290667285521826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,1023,0.024383999407291412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,1023,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,1023,0.020970667401949566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,2047,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,2047,0.039621333281199135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,2047,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,2047,0.031744000812371574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,4095,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,4095,0.06090133388837179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,4095,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,4095,0.05217066903909048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,8191,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,8191,0.10315199693044026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,8191,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,8191,0.0878613293170929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,16383,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,16383,0.18927466869354248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,16383,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,1,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,1,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,1,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,1,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,16383,0.15686399737993875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,3,0.010464000205198923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,3,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,3,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,3,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,7,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,7,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,7,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,7,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,15,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,15,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,15,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,15,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,31,0.010527999450763067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,31,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,31,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,31,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,63,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,63,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,63,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,63,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,127,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,127,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,127,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,127,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,255,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,255,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,255,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,255,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,511,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,511,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,511,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,511,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,1023,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,1023,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,1023,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,1023,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,2047,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,2047,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,2047,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,2047,0.013541333377361298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,4095,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,4095,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,4095,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,4095,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,8191,0.012794667234023413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,8191,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,8191,0.013194666554530462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,8191,0.017770666629076004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,16383,0.012741333494583765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,16383,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,16383,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,16383,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,32767,0.012847999731699625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,32767,0.021231998999913532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,32767,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,32767,0.021290667355060577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,65535,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,65535,0.023530667026837666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,65535,0.013434667140245438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,65535,0.023386667172114056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,131071,0.01543466622630755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,131071,0.03180266668399175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,1,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,1,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,131071,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,1,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,1,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,3,0.009365333244204521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,3,0.009952000031868616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,3,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,3,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,7,0.00897066667675972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,7,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,7,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,7,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,15,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,15,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,15,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,131071,0.029839999973773956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,31,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,15,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,31,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,31,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,31,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,63,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,63,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,63,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,63,0.010522666076819101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,127,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,127,0.00890666681031386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,127,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,127,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,255,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,255,0.008938666433095932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,255,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,255,0.010501333822806677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,511,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,511,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,511,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,511,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,1023,0.010533332824707031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,1023,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,1023,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,1023,0.011594666788975397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,2047,0.012362666428089142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,2047,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,2047,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,2047,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,4095,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,4095,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,4095,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,4095,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,8191,0.012847999731699625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,8191,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,8191,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,8191,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,16383,0.013301332791646322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,16383,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,16383,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,16383,0.019258666783571243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,32767,0.013301332791646322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,32767,0.021136000752449036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,32767,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,32767,0.021040000021457672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,65535,0.012815999488035837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,65535,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,65535,0.02535466601451238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,65535,0.025013332565625507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,131071,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,131071,0.015557333827018738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,131071,0.040778666734695435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,131071,0.033439998825391136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,1,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,1,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,1,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,1,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,3,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,3,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,3,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,3,0.010501333822806677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,7,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,7,0.010154666379094124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,7,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,7,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,15,0.010618666807810465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,15,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,15,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,15,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,31,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,31,0.008789333204428354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,31,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,31,0.010549332946538925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,63,0.009072000160813332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,63,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,63,0.010512000570694605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,63,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,127,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,127,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,127,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,127,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,255,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,255,0.009077333534757296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,255,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,255,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,511,0.009775999933481216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,511,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,511,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,511,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,1023,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,1023,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,1023,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,1023,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,2047,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,2047,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,2047,0.012831999609867731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,2047,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,4095,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,4095,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,4095,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,4095,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,8191,0.012074666718641916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,8191,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,8191,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,8191,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,16383,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,16383,0.01940800001223882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,16383,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,16383,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,32767,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,32767,0.025125332176685333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,32767,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,32767,0.023130667706330616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,65535,0.012256000190973282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,65535,0.039749334255854286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,65535,0.01228800043463707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,65535,0.03133333226044973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,131071,0.014149333039919535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,1,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,1,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,131071,0.06116800010204315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,1,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,1,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,3,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,131071,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,3,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,3,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,3,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,7,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,7,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,7,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,7,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,15,0.01051733394463857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,15,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,15,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,15,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,31,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,31,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,31,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,31,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,63,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,63,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,63,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,63,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,127,0.010565333068370819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,131071,0.053226664662361145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,127,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,127,0.010543999572594961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,127,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,255,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,255,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,255,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,255,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,511,0.010496000448862711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,511,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,511,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,511,0.011237333218256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,1023,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,1023,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,1023,0.010485333700974783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,1023,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,2047,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,2047,0.014277332772811254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,2047,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,2047,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,4095,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,4095,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,4095,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,4095,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,8191,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,8191,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,8191,0.012768000364303589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,8191,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,16383,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,16383,0.023557332654794056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,16383,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,16383,0.02120000123977661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,32767,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,32767,0.03726933399836222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,32767,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,32767,0.03014933317899704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,65535,0.011893333246310553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,65535,0.05821333328882853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,65535,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,65535,0.05170666674772898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,131071,0.014368000129858652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,131071,0.10348266363143921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,131071,0.014661333213249842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,1,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,1,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,1,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,1,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,3,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,3,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,3,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,7,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,7,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,3,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,131071,0.08872532844543457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,7,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,7,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,15,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,15,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,15,0.011237333218256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,15,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,31,0.011424000064531961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,31,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,31,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,31,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,63,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,63,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,63,0.011402666568756104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,63,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,127,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,127,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,127,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,127,0.011365332951148352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,255,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,255,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,255,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,255,0.01138666644692421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,511,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,511,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,511,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,511,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,1023,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,1023,0.01321600005030632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,1023,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,1023,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,2047,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,2047,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,2047,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,2047,0.019013332823912304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,4095,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,4095,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,4095,0.025098666548728943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,4095,0.023562667270501454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,8191,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,8191,0.013728000223636627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,8191,0.04045866678158442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,8191,0.033402666449546814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,1,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,1,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,1,0.011642667154471079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,1,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,3,0.012778667112191519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,3,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,3,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,3,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,7,0.012341332932313284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,7,0.011535999675591787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,7,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,7,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,15,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,15,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,15,0.012741333494583765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,15,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,31,0.012757333616415659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,31,0.011648000528415045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,31,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,31,0.012709333250919977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,63,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,63,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,63,0.012671999633312225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,63,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,127,0.012736000120639801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,127,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,127,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,127,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,255,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,255,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,255,0.012773333738247553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,255,0.011754666765530905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,511,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,511,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,511,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,511,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,1023,0.011354666203260422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,1023,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,1023,0.0120319997270902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,1023,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,2047,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,2047,0.02348800003528595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,2047,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,2047,0.023050665855407715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,4095,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,4095,0.03897066662708918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,4095,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,4095,0.03143466760714849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,8191,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,8191,0.06284800171852112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,8191,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,1,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,1,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,1,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,1,0.014261333892742792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,8191,0.05201066533724467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,3,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,3,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,3,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,3,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,7,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,7,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,7,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,7,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,15,0.013306666165590286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,15,0.014368000129858652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,15,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,15,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,31,0.01404800017674764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,31,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,31,0.013237333546082178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,31,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,63,0.013274667163689932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,63,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,63,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,63,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,127,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,127,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,127,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,127,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,255,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,255,0.013631999492645264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,255,0.014778666198253632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,511,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,255,0.015482666591803232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,511,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,511,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,511,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,1023,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,1023,0.02316266546646754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,1023,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,1023,0.020949333906173706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,2047,0.04002666721741358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,2047,0.01735466718673706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,2047,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,2047,0.03173333406448364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,4095,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,4095,0.06180266539255778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,4095,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,4095,0.05389333268006643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,8191,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,8191,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,8191,0.10412800312042236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,8191,0.08878399928410848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,1,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,1,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,1,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,1,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,3,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,3,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,3,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,3,0.01691199963291486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,7,0.017562666287024815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,7,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,7,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,7,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,15,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,15,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,15,0.016666666915019352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,15,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,31,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,31,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,31,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,31,0.01736533393462499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,63,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,63,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,63,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,63,0.016869333883126576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,127,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,127,0.017583999782800674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,127,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,127,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,255,0.018079999834299088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,255,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,255,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,255,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,511,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,511,0.021386665602525074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,511,0.017477333545684814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,511,0.021503999829292297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,1023,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,1023,0.03540800015131632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,1023,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,1023,0.027823999524116516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,2047,0.021322667598724365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,2047,0.05880533158779144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,2047,0.01974933346112569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,2047,0.050010666251182556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,4095,0.021253332495689392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,4095,0.09547199805577596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,4095,0.01945066700379054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,4095,0.08103999992211659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,8191,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,8191,0.16982932885487875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,8191,0.019717333217461903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,1,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,8191,0.141893337170283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,1,0.012191999703645706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,1,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,1,0.012826666235923767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,3,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,3,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,3,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,3,0.013408000270525614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,7,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,7,0.012597333639860153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,7,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,7,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,15,0.01181866725285848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,15,0.012618667135636011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,15,0.013082666943470636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,15,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,31,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,31,0.012800000607967377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,31,0.012293333808581034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,31,0.01191466674208641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,63,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,63,0.012778667112191519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,63,0.012181332955757776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,63,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,127,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,127,0.012773333738247553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,127,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,127,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,255,0.0145066666106383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,255,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,255,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,255,0.013557333499193192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,511,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,511,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,511,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,511,0.01370666672786077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,1023,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,1023,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,1023,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,1023,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,2047,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,2047,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,2047,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,2047,0.025072000920772552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,1,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,1,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,1,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,1,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,3,0.014736000448465347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,3,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,3,0.014666666587193808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,3,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,7,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,7,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,7,0.013760000467300415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,7,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,15,0.013354666531085968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,15,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,15,0.01404800017674764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,15,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,31,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,31,0.014831999937693277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,31,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,31,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,63,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,63,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,63,0.013194666554530462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,63,0.014607999473810196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,127,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,127,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,127,0.014538666854302088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,127,0.013482666263977686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,255,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,255,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,255,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,255,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,511,0.013269333789745966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,511,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,511,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,511,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,1023,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,1023,0.021359999974568684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,1023,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,1023,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,2047,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,2047,0.04012266546487808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,1,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,2047,0.01747200017174085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,2047,0.0314026673634847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,1,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,1,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,1,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,3,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,3,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,3,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,3,0.017562666287024815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,7,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,7,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,7,0.016719999412695568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,7,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,15,0.01770666614174843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,15,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,15,0.016869333883126576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,15,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,31,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,31,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,31,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,31,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,63,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,63,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,63,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,127,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,127,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,63,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,127,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,127,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,255,0.01868266612291336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,255,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,255,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,255,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,511,0.018538666268189747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,511,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,511,0.02346133440732956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,511,0.020960000654061634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,1023,0.019194666296243668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,1023,0.035717333356539406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,1023,0.01756799966096878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,1023,0.029333333174387615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,2047,0.021007999777793884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,2047,0.058037335673967995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,2047,0.02080533280968666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,2047,0.049738665421803795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,1,0.025055999557177227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,1,0.025199999411900837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,1,0.023472001155217487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,1,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,3,0.025498665869235992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,3,0.02515733242034912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,3,0.023770667612552643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,3,0.023311999936898548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,7,0.025093334416548412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,7,0.02552533398071925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,7,0.0233599990606308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,7,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,15,0.025263999899228413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,15,0.025216000775496166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,15,0.023285334308942158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,15,0.023381332556406658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,31,0.025402667621771496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,31,0.025226667523384094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,31,0.02332799881696701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,31,0.023141334454218548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,63,0.025114665428797405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,63,0.023408000667889912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,63,0.025066666305065155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,63,0.02348266790310542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,127,0.025248001019159954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,127,0.025306666890780132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,127,0.023365333676338196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,127,0.023269332945346832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,255,0.025418666501839954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,255,0.02403733382622401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,255,0.023530667026837666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,255,0.02369066576162974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,511,0.02534399926662445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,511,0.03742400060097376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,511,0.025407999753952026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,511,0.03134933362404505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,1023,0.025493333737055462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,1023,0.056559999783833824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,1023,0.02518400053183238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,1023,0.048197334011395775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,2047,0.029152000943819683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,2047,0.09929066896438599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,2047,0.027280000348885853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,1,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,2047,0.08476799726486206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,1,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,1,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,1,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,3,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,3,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,3,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,3,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,7,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,7,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,7,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,15,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,7,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,15,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,15,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,15,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,31,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,31,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,31,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,63,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,31,0.015728000551462173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,63,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,63,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,63,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,127,0.015504000087579092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,127,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,127,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,127,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,255,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,255,0.016719999412695568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,255,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,255,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,511,0.015642666568358738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,511,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,511,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,511,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,1023,0.02513066679239273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,1023,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,1023,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,1023,0.02312533309062322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,1,0.018853332847356796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,1,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,1,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,1,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,3,0.01887999971707662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,3,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,3,0.01743999992807706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,3,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,7,0.018826667219400406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,7,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,7,0.018885333091020584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,7,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,15,0.017877332866191864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,15,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,15,0.017488000293572743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,15,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,31,0.01749333366751671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,31,0.01876266673207283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,31,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,31,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,63,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,63,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,63,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,63,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,127,0.01749333366751671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,127,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,127,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,127,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,255,0.018485333770513535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,255,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,255,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,255,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,511,0.01800000046690305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,511,0.023050665855407715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,511,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,511,0.021114667256673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,1023,0.01823466643691063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,1023,0.03572266548871994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,1023,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,1023,0.029120000700155895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,1,0.025114665428797405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,1,0.02569066733121872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,1,0.023706667125225067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,1,0.024688000480333965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,3,0.025290665527184803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,3,0.025621332228183746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,3,0.02497066557407379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,3,0.024800000091393787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,7,0.02513599892457326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,7,0.025493333737055462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,7,0.023423999547958374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,7,0.02348266790310542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,15,0.025536000728607178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,15,0.02516799916823705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,15,0.023200000325838726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,15,0.023546665906906128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,31,0.025450666745503742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,31,0.025461333493391674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,31,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,31,0.023306667804718018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,63,0.02516266703605652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,63,0.025616000096003216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,63,0.02362666775782903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,63,0.023552000522613525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,127,0.025072000920772552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,127,0.025066666305065155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,127,0.02317333221435547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,127,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,255,0.026549334327379864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,255,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,255,0.02532266577084859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,255,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,511,0.025226667523384094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,511,0.03805333375930786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,511,0.025562666356563568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,511,0.03153600047032038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,1023,0.025744001070658367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,1023,0.05724266668160757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,1023,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,1023,0.04858666658401489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,1,0.04001600046952566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,1,0.03753600021203359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,1,0.03979733337958654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,1,0.03611200054486593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,3,0.04004266609748205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,3,0.04005333284536997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,3,0.03754666695992152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,3,0.035642666121323906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,7,0.03989866624275843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,7,0.03972266614437103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,7,0.03770133356253306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,7,0.03738133360942205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,15,0.040074666341145836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,15,0.039594667653242745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,15,0.037231999138991036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,15,0.037658666570981346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,31,0.0395413339138031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,31,0.03735466549793879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,31,0.03962666789690653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,31,0.03734400123357773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,63,0.039733332892258964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,63,0.03763733307520548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,63,0.03957866628964742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,63,0.03774933268626531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,127,0.039647998909155525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,127,0.03934400031963984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,127,0.03570133447647095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,127,0.03571200122435888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,255,0.04164266586303711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,255,0.03765333443880081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,255,0.04167466859022776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,255,0.03555200000603994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,511,0.04155733436346054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,511,0.0622026671965917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,511,0.037632000943024956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,511,0.054133335749308266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,1023,0.04180799921353658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,1023,0.037685332198937736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,1023,0.09814400474230449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,1023,0.08473599950472514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,1,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,1,0.00949866697192192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,1,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,1,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,3,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,3,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,3,0.011418666690587997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,3,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,7,0.010512000570694605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,7,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,7,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,7,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,15,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,15,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,15,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,15,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,31,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,31,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,31,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,31,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,63,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,63,0.009072000160813332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,63,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,63,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,127,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,127,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,127,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,255,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,127,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,255,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,255,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,255,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,511,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,511,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,511,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,511,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,1023,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,1023,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,1023,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,1023,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,2047,0.013237333546082178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,2047,0.013461332768201828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,2047,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,2047,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,4095,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,4095,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,4095,0.012826666235923767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,4095,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,8191,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,8191,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,8191,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,8191,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,16383,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,16383,0.01934933289885521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,16383,0.013194666554530462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,16383,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,32767,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,32767,0.021685334543387096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,32767,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,32767,0.02146666745344798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,65535,0.013898666948080063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,65535,0.02588266630967458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,65535,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,65535,0.025781333446502686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,131071,0.014560000350077948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,131071,0.04413333535194397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,1,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,1,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,1,0.011541333049535751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,1,0.010474666953086853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,131071,0.014655999839305878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,3,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,3,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,3,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,3,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,7,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,7,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,7,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,7,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,15,0.010543999572594961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,15,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,15,0.010421333213647207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,15,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,31,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,31,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,31,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,31,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,63,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,63,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,63,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,63,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,127,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,127,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,131071,0.035674666364987694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,127,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,127,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,255,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,255,0.009503999724984169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,255,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,255,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,511,0.009232000137368837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,511,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,511,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,511,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,1023,0.009125333279371262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,1023,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,1023,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,1023,0.011477333803971609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,2047,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,2047,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,2047,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,2047,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,4095,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,4095,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,4095,0.013237333546082178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,4095,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,8191,0.012768000364303589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,8191,0.01754133279124896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,8191,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,8191,0.017642666896184284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,16383,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,16383,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,16383,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,16383,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,32767,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,32767,0.025226667523384094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,32767,0.012831999609867731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,32767,0.0230880007147789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,65535,0.011920000116030375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,65535,0.04041599979003271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,65535,0.013327999661366144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,65535,0.031445334355036415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,131071,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,131071,0.06046933432420095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,1,0.010575999816258749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,131071,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,1,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,1,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,1,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,3,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,3,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,3,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,3,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,7,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,7,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,7,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,7,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,15,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,15,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,15,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,15,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,31,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,31,0.010559999694426855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,131071,0.051882664362589516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,31,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,31,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,63,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,63,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,63,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,63,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,127,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,127,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,127,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,127,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,255,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,255,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,255,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,511,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,511,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,255,0.011477333803971609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,511,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,511,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,1023,0.010570666442314783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,1023,0.012768000364303589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,1023,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,1023,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,2047,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,2047,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,2047,0.012800000607967377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,2047,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,4095,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,4095,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,4095,0.012810666114091873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,4095,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,8191,0.01267733300725619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,8191,0.018543999642133713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,8191,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,8191,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,16383,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,16383,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,16383,0.023269332945346832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,16383,0.020997333029905956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,32767,0.013370666652917862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,32767,0.012714666624863943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,32767,0.03773866593837738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,32767,0.029578665892283123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,65535,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,65535,0.012106666962305704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,65535,0.0517546683549881
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,65535,0.05825600028038025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,131071,0.015429332852363586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,131071,0.014709333578745524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,131071,0.08900800347328186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,131071,0.10133866469065349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,1,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,1,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,1,0.010618666807810465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,1,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,3,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,3,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,3,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,3,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,7,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,7,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,7,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,15,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,7,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,15,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,15,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,15,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,31,0.009466666728258133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,31,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,31,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,31,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,63,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,63,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,63,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,63,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,127,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,127,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,127,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,127,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,255,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,255,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,255,0.010575999816258749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,255,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,511,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,511,0.012181332955757776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,511,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,511,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,1023,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,1023,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,1023,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,1023,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,2047,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,2047,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,2047,0.012831999609867731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,2047,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,4095,0.012826666235923767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,4095,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,4095,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,4095,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,8191,0.012794667234023413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,8191,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,8191,0.023584000766277313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,8191,0.02236266682545344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,16383,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,16383,0.03746666759252548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,16383,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,16383,0.031445334355036415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,32767,0.012298667182525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,32767,0.05816533168156942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,32767,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,32767,0.0506933331489563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,65535,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,65535,0.09931199749310811
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,65535,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,65535,0.08666666348775227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,131071,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,131071,0.1843093236287435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,131071,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,1,0.019152000546455383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,1,0.019402666638294857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,1,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,1,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,131071,0.15827733278274536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,3,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,3,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,3,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,3,0.019050666441520054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,7,0.01937066639463107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,7,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,7,0.018816000471512478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,7,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,15,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,15,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,15,0.01940800001223882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,15,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,31,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,31,0.01933866615096728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,31,0.01915733392039935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,31,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,63,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,63,0.019317333896954853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,63,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,63,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,127,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,127,0.019621333728233974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,127,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,127,0.019013332823912304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,255,0.020970667401949566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,255,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,255,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,255,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,511,0.021370666722456615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,511,0.025413334369659424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,511,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,1,0.027072000006834667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,511,0.023141334454218548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,1,0.025349333882331848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,1,0.02535466601451238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,1,0.025301332275072735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,3,0.025290665527184803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,3,0.0269813338915507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,3,0.025381334125995636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,3,0.02518400053183238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,7,0.025402667621771496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,7,0.025546667476495106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,7,0.024495999018351238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,7,0.025098666548728943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,15,0.025621332228183746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,15,0.025583999852339428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,15,0.024234667420387268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,15,0.025360000630219776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,31,0.02531733363866806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,31,0.025594666600227356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,31,0.025311999022960663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,31,0.025226667523384094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,63,0.02532266577084859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,63,0.026341333985328674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,63,0.025610665480295818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,63,0.024879999458789825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,127,0.025466665625572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,127,0.02604266752799352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,127,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,127,0.0235359991590182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,255,0.027248000105222065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,255,0.025413334369659424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,255,0.025434667865435284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,255,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,511,0.027386667827765148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,511,0.03762666632731756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,511,0.02586666742960612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,1,0.0401706670721372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,511,0.032032000521818794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,1,0.03959999978542328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,1,0.038021333515644073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,1,0.03748800108830134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,3,0.03995199998219808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,3,0.039647998909155525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,3,0.03762666632731756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,3,0.037530665596326195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,7,0.04159466673930486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,7,0.0397173340121905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,7,0.0373279998699824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,7,0.03787733366092046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,15,0.041759997606277466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,15,0.03957866628964742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,15,0.03743999948104223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,15,0.03756266583998998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,31,0.04154666761557261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,31,0.039664000272750854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,31,0.03753600021203359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,31,0.03745066622893015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,63,0.041365332901477814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,63,0.039919999738534294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,63,0.037589333951473236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,63,0.037871999045213066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,127,0.04018666595220566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,127,0.03953066716591517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,127,0.03712533414363861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,127,0.037461332976818085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,255,0.0418453315893809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,255,0.042624001701672874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,255,0.039818666875362396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,255,0.03589333345492681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,511,0.042133331298828125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,511,0.062074666221936546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,511,0.039520000418027244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,511,0.05388266841570536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,1,0.07048533360163371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,1,0.07062399884064992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,1,0.06417066852251689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,1,0.062208001812299095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,3,0.07039466500282288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,3,0.07063466807206471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,3,0.0642080008983612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,3,0.06407999992370605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,7,0.07049066821734111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,7,0.07040533423423767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,7,0.06438933312892914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,7,0.06444266438484192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,15,0.07042133311430614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,15,0.07039466500282288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,15,0.06438399851322174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,15,0.06400533517201741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,31,0.07020799815654755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,31,0.06426666676998138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,31,0.06938666601975758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,31,0.06414933502674103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,63,0.07041599849859874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,63,0.07049599786599477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,63,0.06258666515350342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,63,0.06434133152167003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,127,0.07026666899522145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,127,0.07053333520889282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,127,0.0625439981619517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,127,0.06216000020503998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,255,0.07444266478220622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,255,0.07258666555086772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,255,0.0662613312403361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,255,0.06444799900054932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,511,0.07454399764537811
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,511,0.11146666606267293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,511,0.06638933221499126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,511,0.09507200121879578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,1,0.029578665892283123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,1,0.02975466599067052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,1,0.029509333272775013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,1,0.029114666084448498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,3,0.029674666623274486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,3,0.029696000119050343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,3,0.029477333029111225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,7,0.029919999341169994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,3,0.029509333272775013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,7,0.0296426663796107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,7,0.0296426663796107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,7,0.029338667790095013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,15,0.029487999776999157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,15,0.02935466667016347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,15,0.02974933385848999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,15,0.02754666656255722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,31,0.02958933264017105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,31,0.029461334149042766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,31,0.02743999908367793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,31,0.029338667790095013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,63,0.029504001140594482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,63,0.02961066613594691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,63,0.028234665592511494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,63,0.02924799919128418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,127,0.02980799973011017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,127,0.02958400050799052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,127,0.029130667448043823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,255,0.029450667401154835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,255,0.029578665892283123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,255,0.029258665939172108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,1,0.042954668402671814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,1,0.041946664452552795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,1,0.0395413339138031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,1,0.03956266740957896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,127,0.02794133375088374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,3,0.04350399971008301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,3,0.04359999795754751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,255,0.031583999594052635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,3,0.03978666663169861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,3,0.03995733211437861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,7,0.04362666606903076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,7,0.04238399863243103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,7,0.03976000100374222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,7,0.04009066770474116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,15,0.042277331153551735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,15,0.039994666973749794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,15,0.03973866750796636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,31,0.041984001795450844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,31,0.0420959989229838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,31,0.03975466638803482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,31,0.03974399964014689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,63,0.04181866844495138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,63,0.04194133480389913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,63,0.039642666776975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,63,0.03987200061480204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,127,0.04171733558177948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,15,0.04391466577847799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,127,0.04215466479460398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,127,0.03966933240493139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,127,0.03965866565704346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,255,0.04428266485532125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,255,0.045647998650868736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,255,0.04185600082079569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,255,0.039647998909155525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,1,0.07259200016657512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,1,0.07242133220036824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,1,0.0663679987192154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,1,0.06621866424878438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,3,0.0728053351243337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,3,0.07250133156776428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,3,0.06665066878000896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,3,0.06614933411280315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,7,0.07238399982452393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,7,0.07234133283297221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,7,0.06653866668542226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,7,0.06638399759928386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,15,0.07270933190981548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,15,0.07259733478228252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,15,0.06594666838645935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,15,0.0663679987192154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,31,0.07256533205509186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,31,0.07230933507283528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,31,0.0661653329928716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,31,0.06643733382225037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,63,0.07266133526961009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,63,0.07225599884986877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,63,0.06604800124963124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,63,0.06638399759928386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,127,0.07374933362007141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,127,0.07268266876538594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,127,0.06447466711203258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,127,0.06621333460013072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,255,0.07701333363850911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,255,0.07550933460394542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,255,0.0684799998998642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,1,0.13281066219011942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,1,0.13226667046546936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,1,0.11830400427182515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,1,0.1178559958934784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,3,0.13237333297729492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,3,0.13223999738693237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,3,0.11773332953453064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,3,0.11758400003115337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,7,0.1320266624291738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,7,0.1332319974899292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,7,0.11795733372370402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,255,0.0691786656777064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,7,0.1197760005791982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,15,0.13248533010482788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,15,0.12001066406567891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,15,0.11797333757082622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,15,0.13223999738693237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,31,0.13210666179656982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,31,0.11817600329717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,31,0.11781866351763408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,63,0.13209600249926248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,63,0.11760000387827556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,63,0.13251733779907227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,63,0.11779200037320454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,127,0.1304266651471456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,127,0.11915733416875203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,31,0.1329919993877411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,127,0.11910399794578552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,255,0.1381493310133616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,255,0.1356106698513031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,1,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,1,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,255,0.1262453297773997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,1,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,255,0.1199733316898346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,1,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,3,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,3,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,3,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,3,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,7,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,7,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,7,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,7,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,15,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,15,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,15,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,15,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,31,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,31,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,31,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,31,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,63,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,63,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,63,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,63,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,127,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,127,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,127,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,127,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,255,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,255,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,255,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,255,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,511,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,511,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,511,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,511,0.012117333710193634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,1023,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,1023,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,1023,0.011237333218256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,1023,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,2047,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,2047,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,2047,0.012682666381200155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,2047,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,4095,0.013301332791646322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,4095,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,127,0.13052266836166382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,4095,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,4095,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,8191,0.012746666868527731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,8191,0.012826666235923767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,8191,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,8191,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,16383,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,16383,0.021194666624069214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,16383,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,16383,0.021104000508785248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,32767,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,32767,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,32767,0.02536533276240031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,32767,0.025381334125995636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,65535,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,65535,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,65535,0.03385599950949351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,65535,0.04162666698296865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,1,0.0103946669648091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,1,0.0103946669648091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,1,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,1,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,3,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,3,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,3,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,3,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,7,0.010474666953086853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,7,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,7,0.011370666325092316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,7,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,15,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,15,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,15,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,15,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,31,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,31,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,31,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,31,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,63,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,63,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,63,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,63,0.010506667196750641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,127,0.010570666442314783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,127,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,127,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,127,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,255,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,255,0.010181333248813948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,255,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,255,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,511,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,511,0.011349332829316458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,511,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,511,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,1023,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,1023,0.01157333329319954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,1023,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,1023,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,2047,0.012810666114091873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,2047,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,2047,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,2047,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,4095,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,4095,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,4095,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,4095,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,8191,0.013237333546082178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,8191,0.018992000569899876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,8191,0.01321600005030632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,8191,0.01934933289885521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,16383,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,16383,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,16383,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,16383,0.021482666333516438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,32767,0.012847999731699625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,32767,0.03921066721280416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,32767,0.013194666554530462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,32767,0.03152533372243246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,65535,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,65535,0.058229332168896995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,1,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,65535,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,1,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,1,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,1,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,3,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,3,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,3,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,3,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,7,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,7,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,65535,0.05173333485921224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,7,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,7,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,15,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,15,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,15,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,15,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,31,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,31,0.010501333822806677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,31,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,31,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,63,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,63,0.010549332946538925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,63,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,63,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,127,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,127,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,127,0.010618666807810465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,127,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,255,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,255,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,255,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,255,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,511,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,511,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,511,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,1023,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,1023,0.012831999609867731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,1023,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,1023,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,2047,0.012618667135636011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,2047,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,2047,0.01340266689658165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,2047,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,4095,0.012794667234023413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,4095,0.01915733392039935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,4095,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,4095,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,8191,0.012805332740147909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,8191,0.023552000522613525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,8191,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,8191,0.021344001094500225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,16383,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,16383,0.03845866769552231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,16383,0.013274667163689932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,16383,0.03142400085926056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,32767,0.013248000293970108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,32767,0.0601440022389094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,32767,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,32767,0.05231466889381409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,65535,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,65535,0.10130666693051656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,1,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,65535,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,1,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,1,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,1,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,3,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,3,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,3,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,3,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,7,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,7,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,65535,0.08686932921409607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,7,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,7,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,15,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,15,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,15,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,15,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,31,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,31,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,31,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,63,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,31,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,63,0.010538666198650995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,63,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,63,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,127,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,127,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,127,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,127,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,255,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,255,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,255,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,255,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,511,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,511,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,511,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,511,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,1023,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,1023,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,1023,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,1023,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,2047,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,2047,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,2047,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,2047,0.013669333110253016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,4095,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,4095,0.023141334454218548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,4095,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,4095,0.021546666820844013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,8191,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,8191,0.037445334096749626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,8191,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,8191,0.029802667597929638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,16383,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,16383,0.058543999989827476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,16383,0.013237333546082178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,16383,0.05208000044027964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,32767,0.013359999905029932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,32767,0.09967999656995137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,32767,0.013274667163689932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,32767,0.0858026643594106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,65535,0.01350933313369751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,65535,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,65535,0.18330132961273193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,1,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,1,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,3,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,65535,0.15662933389345804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,3,0.01137599969903628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,7,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,7,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,15,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,15,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,31,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,31,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,63,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,63,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,127,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,127,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,255,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,255,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,511,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,511,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,1023,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,1023,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,2047,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,2047,0.017445333302021027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,4095,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,4095,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,8191,0.02347733328739802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,8191,0.021125334004561108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,16383,0.040218666195869446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,16383,0.02733866622050603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,1,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,1,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,32767,0.06284266710281372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,3,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,3,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,7,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,32767,0.04186133543650309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,7,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,15,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,15,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,31,0.011402666568756104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,31,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,63,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,63,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,127,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,255,0.011333333949247995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,127,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,255,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,511,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,511,0.011391999820868174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,1023,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,1023,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,2047,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,2047,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,4095,0.021615999440352123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,4095,0.018976000448067982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,8191,0.04026666780312856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,8191,0.023434666295846302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,16383,0.06178133189678192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,16383,0.039594667653242745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,1,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,1,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,3,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,32767,0.10390933354695638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,3,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,32767,0.05975999931494395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,7,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,7,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,15,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,15,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,31,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,31,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,63,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,63,0.011461333682139715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,127,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,127,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,255,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,255,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,511,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,511,0.012826666235923767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,1023,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,1023,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,2047,0.021168000996112823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,2047,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,4095,0.03984533250331879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,4095,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,8191,0.059674665331840515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,8191,0.03809600075085958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,16383,0.10185600320498149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,16383,0.05755199988683065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,1,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,3,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,1,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,3,0.01259200026591619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,7,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,32767,0.1845973332722982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,32767,0.09840533137321472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,7,0.011861333002646765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,15,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,15,0.01137599969903628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,31,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,31,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,63,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,63,0.011877333124478659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,127,0.015685333559910457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,127,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,255,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,255,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,511,0.014752000570297241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,511,0.013253333667914072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,1023,0.019258666783571243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,1023,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,2047,0.03974399964014689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,2047,0.02380266785621643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,4095,0.03952533255020777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,4095,0.06100266675154368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,8191,0.06003733476003011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,8191,0.1037600040435791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,16383,0.10054399569829305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,16383,0.18930133183797201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,1,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,1,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,3,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,3,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,32767,0.18305599689483643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,7,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,32767,0.362768014272054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,7,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,15,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,15,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,31,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,31,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,63,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,63,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,127,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,127,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,255,0.011349332829316458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,255,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,511,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,511,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,1023,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,1023,0.011968000481526056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,2047,0.012240000069141388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,2047,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,4095,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,4095,0.01339200014869372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,8191,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,8191,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,16383,0.021573332448800404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,16383,0.021125334004561108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,32767,0.02938666691382726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,32767,0.02908266584078471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,65535,0.03271999955177307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,65535,0.031685332457224526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,1,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,1,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,3,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,3,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,7,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,7,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,131071,0.03718400001525879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,15,0.00996800015370051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,15,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,31,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,31,0.010362666721145311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,63,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,131071,0.035455999275048576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,63,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,127,0.010458666831254959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,127,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,255,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,255,0.010549332946538925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,511,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,511,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,1023,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,1023,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,2047,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,2047,0.011402666568756104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,4095,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,4095,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,8191,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,8191,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,16383,0.020784000555674236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,16383,0.020992000897725422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,32767,0.023226665953795116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,32767,0.023413332800070446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,65535,0.025477332373460133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,1,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,65535,0.02334933231274287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,1,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,3,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,3,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,7,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,7,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,15,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,15,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,31,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,31,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,63,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,63,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,127,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,127,0.010117333382368088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,131071,0.03126399964094162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,255,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,255,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,511,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,511,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,1023,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,131071,0.0273333340883255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,1023,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,2047,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,2047,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,4095,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,4095,0.01251199965675672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,8191,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,8191,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,16383,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,16383,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,32767,0.021002667645613354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,32767,0.019194666296243668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,65535,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,65535,0.023823998868465424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,1,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,1,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,3,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,3,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,7,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,7,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,131071,0.0272533322374026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,131071,0.04346133271853129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,15,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,15,0.012351999680201212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,31,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,31,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,63,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,63,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,127,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,127,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,255,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,255,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,511,0.01138666644692421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,511,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,1023,0.011413333316644033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,1023,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,2047,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,2047,0.012805332740147909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,4095,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,4095,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,8191,0.017685333887736004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,8191,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,16383,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,16383,0.017792000124851864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,32767,0.023152001202106476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,32767,0.019194666296243668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,65535,0.038560000558694206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,65535,0.023418667415777843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,1,0.010570666442314783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,1,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,3,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,3,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,7,0.011472000430027643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,131071,0.060047999024391174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,7,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,15,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,15,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,31,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,31,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,131071,0.04030933231115341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,63,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,63,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,127,0.01044800008336703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,127,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,255,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,255,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,511,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,511,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,1023,0.012789333860079447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,1023,0.012298667182525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,2047,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,2047,0.013424000392357508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,4095,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,4095,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,8191,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,8191,0.017466666797796886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,16383,0.021557333568731945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,16383,0.0229066660006841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,32767,0.025221332907676697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,32767,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,65535,0.027621333797772724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,65535,0.025472000241279602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,1,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,3,0.010512000570694605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,1,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,3,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,7,0.010522666076819101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,7,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,15,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,15,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,31,0.009797333429257074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,63,0.010543999572594961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,31,0.011407999942700068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,131071,0.031104000906149547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,127,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,63,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,131071,0.029290666182835896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,127,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,255,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,255,0.010565333068370819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,511,0.011594666788975397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,511,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,1023,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,1023,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,2047,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,2047,0.013194666554530462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,4095,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,4095,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,8191,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,8191,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,16383,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,16383,0.019653332730134327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,32767,0.019541333119074505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,32767,0.022384000321229298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,65535,0.025040000677108765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,65535,0.02161066730817159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,1,0.010496000448862711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,1,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,3,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,3,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,7,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,7,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,15,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,131071,0.044154668847719826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,15,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,31,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,31,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,63,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,63,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,131071,0.027466667195161183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,127,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,127,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,255,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,255,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,511,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,511,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,1023,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,1023,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,2047,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,2047,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,4095,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,4095,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,8191,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,8191,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,16383,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,16383,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,32767,0.023290666441122692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,32767,0.020629333953062694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,65535,0.039594667653242745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,65535,0.024959998826185863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,1,0.009573333586255709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,1,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,3,0.0102186668664217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,3,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,7,0.010288000106811523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,7,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,131071,0.06020799775918325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,15,0.010026666646202406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,15,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,31,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,31,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,131071,0.04126933217048645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,63,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,63,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,127,0.010559999694426855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,127,0.010575999816258749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,255,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,255,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,511,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,511,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,1023,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,1023,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,2047,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,2047,0.014069333672523499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,4095,0.01691199963291486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,4095,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,8191,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,8191,0.01809599995613098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,16383,0.021456000705560047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,16383,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,32767,0.03841066608826319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,32767,0.02370133250951767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,65535,0.059664001067479454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,65535,0.03772266705830892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,1,0.011338666081428528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,1,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,3,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,3,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,7,0.011461333682139715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,7,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,15,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,15,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,31,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,31,0.011584000041087469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,131071,0.10022399822870891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,63,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,63,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,131071,0.059248000383377075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,127,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,127,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,255,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,255,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,511,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,511,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,1023,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,1023,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,2047,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,2047,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,4095,0.021029333273569744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,4095,0.021087999145189922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,8191,0.04178666571776072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,8191,0.02629866699377696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,1,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,1,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,16383,0.06274133423964183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,3,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,16383,0.043375998735427856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,3,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,7,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,7,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,15,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,15,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,31,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,31,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,63,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,63,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,127,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,127,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,255,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,255,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,511,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,511,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,1023,0.014709333578745524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,1023,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,2047,0.02067199970285098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,2047,0.018837332725524902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,4095,0.03968533376852671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,4095,0.02342933416366577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,8191,0.06010666489601135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,8191,0.03922666609287262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,1,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,16383,0.10230933626492818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,1,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,16383,0.057904000083605446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,3,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,3,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,7,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,7,0.012757333616415659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,15,0.012800000607967377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,31,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,15,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,31,0.012736000120639801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,63,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,63,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,127,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,127,0.012565333396196365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,255,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,255,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,511,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,511,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,1023,0.0186666672428449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,1023,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,2047,0.04005866746107737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,2047,0.025146665672461193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,4095,0.06171200176080068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,4095,0.04062400013208389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,8191,0.10351999600728352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,8191,0.0612960010766983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,1,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,16383,0.1884160041809082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,1,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,16383,0.10152533650398254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,3,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,3,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,7,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,7,0.01664000004529953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,15,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,31,0.014688000082969666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,15,0.013712000101804733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,31,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,63,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,63,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,127,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,255,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,127,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,255,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,511,0.018858666221300762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,511,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,1023,0.03569599986076355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,1023,0.021418665846188862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,2047,0.060991997520128884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,2047,0.039701332648595176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,4095,0.06126933296521505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,4095,0.10504532853762309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,8191,0.10347732901573181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,8191,0.18926932414372763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,1,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,1,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,3,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,3,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,7,0.011546666423479715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,7,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,15,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,15,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,16383,0.18668800592422485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,16383,0.3632693290710449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,31,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,31,0.011434666812419891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,63,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,127,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,63,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,127,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,255,0.010549332946538925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,255,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,511,0.011578666667143503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,511,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,1023,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,1023,0.011690666278203329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,2047,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,2047,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,4095,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,4095,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,8191,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,8191,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,16383,0.021989333132902782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,16383,0.021173333128293354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,32767,0.025216000775496166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,32767,0.023141334454218548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,65535,0.025754667818546295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,65535,0.025098666548728943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,1,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,1,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,3,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,3,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,7,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,7,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,131071,0.04642133414745331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,15,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,15,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,31,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,31,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,63,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,131071,0.03165333221356074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,63,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,127,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,127,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,255,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,255,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,511,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,511,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,1023,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,1023,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,2047,0.013781332721312841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,2047,0.013317332913478216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,4095,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,4095,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,8191,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,8191,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,16383,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,16383,0.019445333629846573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,32767,0.023455999791622162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,32767,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,65535,0.04164266586303711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,65535,0.025466665625572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,1,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,3,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,1,0.011994666109482447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,3,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,7,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,7,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,15,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,15,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,31,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,31,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,131071,0.0626933326323827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,63,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,63,0.011722666521867117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,127,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,127,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,255,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,255,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,511,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,511,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,131071,0.043509334325790405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,1023,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,1023,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,2047,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,2047,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,4095,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,4095,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,8191,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,8191,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,16383,0.023232000569502514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,16383,0.02094399929046631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,32767,0.04121600091457367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,32767,0.02475200096766154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,65535,0.039887999494870506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,65535,0.060506666700045265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,1,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,1,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,3,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,3,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,7,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,7,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,15,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,15,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,31,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,31,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,63,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,63,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,127,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,127,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,131071,0.10357333223025005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,255,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,255,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,131071,0.061103999614715576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,511,0.012015999605258306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,511,0.01257066677014033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,1023,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,1023,0.012784000486135483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,2047,0.016074666132529575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,2047,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,4095,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,4095,0.015664000064134598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,8191,0.02128000060717265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,8191,0.018885333091020584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,16383,0.03982399900754293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,16383,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,32767,0.06084799766540527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,32767,0.038346665600935616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,65535,0.10266666611035664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,65535,0.05841066439946493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,1,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,1,0.01146666705608368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,3,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,3,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,131071,0.19052799542744955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,7,0.01157333329319954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,7,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,131071,0.09912000099817912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,15,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,15,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,31,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,31,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,63,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,63,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,127,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,127,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,255,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,255,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,511,0.012800000607967377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,511,0.013167999684810638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,1023,0.01333333303531011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,1023,0.01482133318980535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,2047,0.021520001192887623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,2047,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,4095,0.04048000027736028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,4095,0.025754667818546295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,1,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,8191,0.06141866743564606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,1,0.013440000514189402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,3,0.012736000120639801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,8191,0.041536000867684685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,7,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,3,0.013408000270525614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,7,0.012709333250919977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,15,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,15,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,31,0.012794667234023413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,31,0.013386666774749756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,63,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,63,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,127,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,127,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,255,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,255,0.01268799975514412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,511,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,511,0.013722666849692663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,1023,0.01886933296918869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,1023,0.016645333419243496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,2047,0.040463998913764954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,2047,0.02454400062561035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,4095,0.06186666587988535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,4095,0.03995199998219808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,1,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,1,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,3,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,8191,0.10461333394050598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,3,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,8191,0.060778667529424034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,7,0.014853333433469137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,7,0.013167999684810638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,15,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,15,0.014474666366974512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,31,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,31,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,63,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,63,0.013722666849692663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,127,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,127,0.013850666582584381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,255,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,255,0.01379199946920077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,511,0.01887999971707662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,511,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,1023,0.035631999373435974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,1023,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,2047,0.06185600161552429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,2047,0.039503999054431915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,4095,0.10346133510271709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,4095,0.06234133243560791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,1,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,1,0.017535999417304993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,3,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,8191,0.10336533188819885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,8191,0.18891199429829916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,3,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,7,0.018735999862353008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,7,0.017509333789348602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,15,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,15,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,31,0.018826667219400406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,31,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,63,0.019498666127522785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,63,0.01770666614174843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,127,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,127,0.017407999684413273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,255,0.01878400022784869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,255,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,511,0.03526933242877325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,511,0.021456000705560047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,1023,0.055498664577802025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,1023,0.03526400029659271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,2047,0.09876799583435059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,2047,0.059445331494013466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,4095,0.17817066113154092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,4095,0.09644800424575806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,1,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,8191,0.3370186487833659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,8191,0.17078399658203125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,1,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,3,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,3,0.013477332890033722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,7,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,7,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,15,0.013338666409254074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,15,0.013386666774749756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,31,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,31,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,63,0.014576000471909841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,63,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,127,0.01321600005030632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,127,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,255,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,255,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,511,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,511,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,1023,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,1023,0.017477333545684814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,1,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,2047,0.039808000127474465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,2047,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,1,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,3,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,7,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,7,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,3,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,15,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,15,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,31,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,31,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,63,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,63,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,127,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,255,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,127,0.014064000298579534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,255,0.014720000326633453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,511,0.01971199984351794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,511,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,1023,0.021509334444999695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,1023,0.035562666753927864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,2047,0.06294933458169301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,1,0.01953599974513054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,1,0.01756799966096878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,2047,0.0401706670721372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,3,0.018709332992633183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,3,0.019461333751678467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,7,0.01952533299724261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,7,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,15,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,15,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,31,0.019402666638294857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,31,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,63,0.019402666638294857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,63,0.017514667163292568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,127,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,127,0.017093333105246227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,255,0.019498666127522785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,255,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,511,0.021322667598724365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,511,0.03446933378775915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,1023,0.034714666505654655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,1023,0.05483733117580414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,1,0.029477333029111225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,1,0.025429333249727886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,2047,0.09845866759618123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,2047,0.05870933334032694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,3,0.029311999678611755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,3,0.025706666211287182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,7,0.027632000545660656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,7,0.02515733242034912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,15,0.029343999922275543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,15,0.02513599892457326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,31,0.027637332677841187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,31,0.025274666647116344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,63,0.029088000456492107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,63,0.02532266577084859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,127,0.027285332481066387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,127,0.025605333348115284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,255,0.03428266694148382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,255,0.02516799916823705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,511,0.055488000313440956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,511,0.036933332681655884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,1023,0.09514133135477702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,1023,0.05532266696294149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,2047,0.17850667238235474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,2047,0.09893866380055745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,1,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,1,0.016842667013406754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,3,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,3,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,7,0.017498667041460674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,7,0.016058667252461117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,15,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,15,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,31,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,31,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,63,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,63,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,127,0.01685333376129468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,127,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,255,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,255,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,511,0.02123733361562093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,511,0.0189280000825723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,1023,0.03677333394686381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,1023,0.02515733242034912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,1,0.019152000546455383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,1,0.018901333212852478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,3,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,3,0.01878400022784869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,7,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,7,0.019695999721686046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,15,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,15,0.018768000106016796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,31,0.021141332884629566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,63,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,63,0.018800000349680584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,31,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,127,0.020799999435742695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,127,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,255,0.019610666980346043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,255,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,511,0.0355679988861084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,511,0.02330133318901062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,1023,0.05593066910902659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,1,0.029482667644818623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,1023,0.035717333356539406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,1,0.025381334125995636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,3,0.029146666328112285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,3,0.025472000241279602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,7,0.029631999631722767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,7,0.025621332228183746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,15,0.029653333127498627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,15,0.025434667865435284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,31,0.02935466667016347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,31,0.025237334271272022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,63,0.025349333882331848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,63,0.029311999678611755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,127,0.02937600016593933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,127,0.02517866591612498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,255,0.03566933423280716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,255,0.025253333151340485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,511,0.03690666705369949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,511,0.056373332937558494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,1023,0.05649066468079885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,1023,0.09526933232943217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,1,0.04595733185609182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,1,0.03977599988381068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,3,0.046207999189694725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,3,0.039877332746982574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,7,0.04584000011285146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,7,0.03968533376852671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,15,0.04607999821503957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,15,0.03965866565704346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,31,0.04572799801826477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,31,0.03974399964014689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,63,0.04604266583919525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,63,0.03994133323431015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,127,0.04826666911443075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,127,0.039749334255854286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,255,0.05610666672388712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,255,0.042965332667032875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,511,0.06038933495680491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,511,0.09525332848230998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,1,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,1,0.01145600030819575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,3,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,3,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,7,0.0180479995906353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,7,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,1023,0.09702400366465251
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,1023,0.1751413345336914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,15,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,15,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,31,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,31,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,63,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,63,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,127,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,127,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,255,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,255,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,511,0.011786667009194693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,511,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,1023,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,1023,0.012485332787036896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,2047,0.014042666802803675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,2047,0.013936000565687815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,4095,0.01758933315674464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,4095,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,8191,0.02033599962790807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,8191,0.018992000569899876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,16383,0.021168000996112823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,16383,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,32767,0.025439999997615814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,32767,0.02293866624434789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,65535,0.04253333310286204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,65535,0.027749332288901012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,1,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,1,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,3,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,3,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,7,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,7,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,15,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,131071,0.06404800216356914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,15,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,31,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,31,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,63,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,63,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,131071,0.04509866734345754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,127,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,127,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,255,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,511,0.011381333072980246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,255,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,511,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,1023,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,1023,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,2047,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,2047,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,4095,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,4095,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,8191,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,8191,0.019402666638294857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,16383,0.023567999402681988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,16383,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,32767,0.042405332128206887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,32767,0.02537599951028824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,65535,0.06155199805895487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,1,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,65535,0.0407679999868075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,1,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,3,0.010575999816258749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,3,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,7,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,7,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,15,0.011440000186363855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,15,0.010570666442314783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,31,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,31,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,63,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,63,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,127,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,127,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,131071,0.06187200049559275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,255,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,255,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,131071,0.10340266426404317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,511,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,511,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,1023,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,1023,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,2047,0.016143999993801117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,2047,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,4095,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,4095,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,8191,0.021386665602525074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,8191,0.019013332823912304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,16383,0.039733332892258964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,16383,0.023546665906906128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,32767,0.03818666686614355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,32767,0.06031466523806254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,65535,0.10224533081054688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,65535,0.05829333265622457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,1,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,1,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,3,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,3,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,7,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,7,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,15,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,15,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,31,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,31,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,63,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,63,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,131071,0.1009279986222585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,131071,0.18921599785486856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,127,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,255,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,127,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,255,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,511,0.012757333616415659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,511,0.011493333925803503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,1023,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,1023,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,2047,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,2047,0.01597333326935768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,4095,0.021066665649414062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,4095,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,8191,0.03920533259709676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,8191,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,16383,0.06025599936644236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,16383,0.03863999992609024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,32767,0.10326400399208069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,32767,0.05735999842484792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,65535,0.1892533302307129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,65535,0.09739200274149577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,1,0.023951999843120575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,131071,0.36314133803049725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,1,0.02147199958562851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,131071,0.18052266041437784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,3,0.02346666653951009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,3,0.021712000171343487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,7,0.02333866556485494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,7,0.021717332303524017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,15,0.023247999449570973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,31,0.023445333043734234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,15,0.021509334444999695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,31,0.02120000123977661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,63,0.023205332458019257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,63,0.02178666740655899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,127,0.02197333425283432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,127,0.023242667317390442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,255,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,255,0.021802666286627453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,511,0.037674665451049805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,1,0.031583999594052635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,511,0.02743999908367793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,1,0.027637332677841187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,3,0.030853333572546642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,3,0.02736533433198929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,7,0.029663999875386555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,7,0.027610667049884796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,15,0.030410667260487873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,15,0.027503999571005504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,31,0.029669334491093952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,31,0.027562665442625683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,63,0.03073599934577942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,63,0.027280000348885853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,127,0.029504001140594482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,127,0.02775999903678894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,255,0.03803733239571253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,255,0.02733866622050603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,511,0.05799466868241628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,511,0.03963200002908707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,1,0.049178664882977806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,1,0.04195733368396759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,3,0.049365331729253135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,3,0.04197333256403605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,7,0.04836800197760264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,7,0.041989331444104515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,15,0.04788800080617269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,15,0.04176533222198486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,31,0.04790399968624115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,31,0.04195199906826019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,63,0.0479360024134318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,63,0.04221866528193156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,127,0.052069331208864846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,127,0.04174399872620901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,255,0.05856533348560333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,255,0.04541866481304169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,1,0.08477866649627686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,511,0.09869333108266194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,511,0.06202666461467743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,1,0.07049599786599477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,3,0.08343467116355896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,3,0.07073600093523662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,7,0.0848533312479655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,7,0.07037866612275441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,15,0.08415466547012329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,15,0.07050133248170216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,31,0.0846613347530365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,31,0.07071466743946075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,63,0.08365866541862488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,63,0.07072000205516815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,127,0.08498666683832805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,127,0.0713866651058197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,255,0.09887466828028361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,255,0.07418133318424225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,511,0.17705066998799643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,511,0.10950932900110881
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,1,0.03761066744724909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,1,0.03363733241955439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,3,0.037605332831541695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,7,0.036042665441830955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,3,0.03396799912055334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,7,0.03350399931271871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,15,0.033802665770053864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,15,0.03762666632731756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,31,0.03751999884843826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,31,0.033530667424201965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,63,0.03738133360942205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,63,0.03376533339420954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,127,0.03754666695992152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,127,0.03370666752258936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,255,0.04251733422279358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,1,0.0460746685663859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,1,0.051701332132021584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,255,0.03382933388153712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,3,0.051776001850763954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,7,0.052202666799227394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,3,0.04599999884764353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,7,0.04567466676235199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,15,0.052069331208864846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,15,0.04584000011285146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,31,0.052111998200416565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,31,0.046053335070610046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,63,0.05202666421731313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,63,0.045834665497144066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,127,0.046069333950678505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,127,0.054048001766204834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,255,0.06293866535027821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,1,0.08874666690826416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,1,0.07487999896208446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,255,0.04974933465321859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,3,0.0885706643263499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,3,0.07660266757011414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,7,0.0867680013179779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,7,0.07634133100509644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,15,0.08828266461690266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,15,0.07667199770609538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,31,0.08703999718030293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,31,0.07567466795444489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,63,0.07633066674073537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,127,0.0885599950949351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,127,0.0773173322280248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,255,0.1043893297513326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,255,0.07897600034872691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,1,0.13403733571370444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,3,0.15651200215021768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,3,0.13425599535306296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,63,0.08946133653322856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,7,0.13435199856758118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,15,0.13434666395187378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,15,0.1572640041510264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,1,0.15752533078193665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,31,0.15709333618481955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,31,0.134250670671463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,63,0.15689067045847574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,7,0.1567039986451467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,63,0.13397333025932312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,127,0.1574079990386963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,127,0.13248533010482788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,1,0.012485332787036896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,1,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,3,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,3,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,7,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,7,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,15,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,15,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,31,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,31,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,63,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,63,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,127,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,255,0.1370186706384023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,127,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,255,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,255,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,511,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,511,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,255,0.18693333864212036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,1023,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,1023,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,2047,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,2047,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,4095,0.018415999909241993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,4095,0.019039999693632126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,8191,0.02004266654451688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,8191,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,16383,0.023557332654794056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,16383,0.023168000082174938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,32767,0.04234666625658671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,32767,0.027376001079877216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,1,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,1,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,3,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,3,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,7,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,7,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,15,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,15,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,65535,0.04377600053946177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,65535,0.06281066437562306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,31,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,31,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,63,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,63,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,127,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,127,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,255,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,255,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,511,0.01268799975514412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,511,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,1023,0.013349333157142004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,1023,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,2047,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,2047,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,4095,0.019088000059127808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,4095,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,8191,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,8191,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,16383,0.040394666294256844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,16383,0.023397333920001984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,32767,0.060453335444132485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,32767,0.03977599988381068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,1,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,1,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,3,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,3,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,65535,0.10361599922180176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,7,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,7,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,15,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,65535,0.05830933153629303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,15,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,31,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,31,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,63,0.011434666812419891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,63,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,127,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,127,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,255,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,255,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,511,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,511,0.011349332829316458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,1023,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,1023,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,2047,0.017504000415404636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,2047,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,4095,0.020981334149837494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,4095,0.019386666516462963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,8191,0.03956799954175949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,8191,0.02325333406527837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,16383,0.06081599990526835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,16383,0.03862933317820231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,32767,0.10286933183670044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,32767,0.05831466615200043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,1,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,3,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,1,0.01146666705608368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,3,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,7,0.012170666207869848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,7,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,65535,0.18673600753148398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,15,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,15,0.010512000570694605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,31,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,63,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,31,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,65535,0.09799999992052714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,63,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,127,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,127,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,255,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,255,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,511,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,511,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,1023,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,1023,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,2047,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,2047,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,4095,0.03827200084924698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,4095,0.02333866556485494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,8191,0.059562668204307556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,8191,0.037647999823093414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,16383,0.057664001981417336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,16383,0.10203733046849568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,32767,0.09681066870689392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,32767,0.18730666240056357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,1,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,1,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,1,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,1,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,3,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,3,0.010533332824707031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,3,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,3,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,65535,0.1766293247540792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,7,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,7,0.010277333358923594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,65535,0.36004801591237384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,7,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,7,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,15,0.009850666547815004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,15,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,15,0.011237333218256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,15,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,31,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,31,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,31,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,31,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,63,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,63,0.0100426667680343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,63,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,63,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,127,0.010106666634480158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,127,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,127,0.011493333925803503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,127,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,255,0.01044800008336703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,255,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,255,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,255,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,511,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,511,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,511,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,511,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,1023,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,1023,0.011381333072980246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,1023,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,1023,0.01191466674208641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,2047,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,2047,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,2047,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,2047,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,4095,0.012815999488035837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,4095,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,4095,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,4095,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,8191,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,8191,0.01887999971707662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,8191,0.012805332740147909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,8191,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,16383,0.013237333546082178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,16383,0.023738667368888855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,16383,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,16383,0.023157333334287006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,32767,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,32767,0.03769599894682566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,32767,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,32767,0.031717332700888314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,65535,0.013408000270525614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,1,0.011813333878914515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,65535,0.060592000683148704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,1,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,1,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,1,0.011477333803971609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,65535,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,3,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,3,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,3,0.010469333579142889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,3,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,7,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,7,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,7,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,7,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,15,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,15,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,15,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,15,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,31,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,31,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,65535,0.05211733281612396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,31,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,31,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,63,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,63,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,63,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,63,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,127,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,127,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,127,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,127,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,255,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,255,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,255,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,255,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,511,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,511,0.012800000607967377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,511,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,511,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,1023,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,1023,0.013290667285521826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,1023,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,1023,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,2047,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,2047,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,2047,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,2047,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,4095,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,4095,0.01876266673207283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,4095,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,4095,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,8191,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,8191,0.023525332411130268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,8191,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,8191,0.02250133454799652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,16383,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,16383,0.03851733356714249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,16383,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,16383,0.03176533430814743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,32767,0.012800000607967377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,32767,0.059877331058184304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,32767,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,32767,0.052144000927607216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,65535,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,65535,0.1018453339735667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,1,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,65535,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,1,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,1,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,1,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,3,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,3,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,3,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,3,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,7,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,7,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,7,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,65535,0.08685333530108134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,7,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,15,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,15,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,15,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,15,0.011338666081428528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,31,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,31,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,31,0.011541333049535751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,63,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,63,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,31,0.011770666887362799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,63,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,63,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,127,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,127,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,127,0.011936000237862269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,127,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,255,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,255,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,255,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,255,0.01146666705608368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,511,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,511,0.011338666081428528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,511,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,1023,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,511,0.011472000430027643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,1023,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,1023,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,1023,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,2047,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,2047,0.01810666670401891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,2047,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,2047,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,4095,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,4095,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,4095,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,4095,0.02111999938885371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,8191,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,8191,0.037818667789300285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,8191,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,8191,0.030106666187445324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,16383,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,16383,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,16383,0.05855466425418854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,16383,0.05158400038878123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,32767,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,32767,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,32767,0.099263995885849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,32767,0.08635200063387553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,65535,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,65535,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,65535,0.18253866831461588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,65535,0.15666666626930237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,1,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,1,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,1,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,1,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,3,0.010618666807810465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,3,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,3,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,3,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,7,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,7,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,7,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,7,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,15,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,15,0.00916800027092298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,15,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,15,0.010522666076819101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,31,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,31,0.009039999917149544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,31,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,31,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,63,0.009850666547815004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,63,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,63,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,63,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,127,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,127,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,127,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,127,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,255,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,255,0.00949866697192192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,255,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,255,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,511,0.009285333255926767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,511,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,511,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,511,0.011493333925803503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,1023,0.010618666807810465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,1023,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,1023,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,1023,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,2047,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,2047,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,2047,0.010431999961535135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,2047,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,4095,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,4095,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,4095,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,4095,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,8191,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,8191,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,8191,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,8191,0.01543466622630755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,16383,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,16383,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,16383,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,16383,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,32767,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,32767,0.027621333797772724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,32767,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,32767,0.025285333395004272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,65535,0.011717333147923151
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,65535,0.029680001238981884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,65535,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,65535,0.029669334491093952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,131071,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,131071,0.03329066683848699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,1,0.00956266683836778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,131071,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,1,0.009402666861812273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,1,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,1,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,3,0.009306666751702627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,3,0.010255999863147736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,3,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,3,0.01051733394463857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,7,0.009375999992092451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,7,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,7,0.011498666057984034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,7,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,15,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,15,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,15,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,15,0.01146666705608368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,131071,0.03369600077470144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,31,0.010421333213647207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,31,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,31,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,31,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,63,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,63,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,63,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,127,0.009194666519761086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,127,0.010570666442314783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,127,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,127,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,255,0.00916800027092298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,255,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,255,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,255,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,511,0.009194666519761086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,511,0.011349332829316458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,511,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,511,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,1023,0.009189333145817121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,1023,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,1023,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,1023,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,2047,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,2047,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,2047,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,2047,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,4095,0.010538666198650995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,4095,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,4095,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,4095,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,8191,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,8191,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,8191,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,8191,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,16383,0.010549332946538925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,16383,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,16383,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,16383,0.019530666371186573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,32767,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,32767,0.02142400046189626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,32767,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,32767,0.021573332448800404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,65535,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,65535,0.023210667073726654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,65535,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,65535,0.02399466683467229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,131071,0.013370666652917862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,131071,0.027248000105222065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,1,0.008997333546479544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,1,0.009178666397929192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,131071,0.012666666259368261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,1,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,1,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,3,0.009152000149091085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,3,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,3,0.010618666807810465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,3,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,7,0.009130666653315226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,7,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,7,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,7,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,15,0.009248000259200731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,15,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,15,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,131071,0.027290667096773785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,15,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,31,0.009119999905427298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,31,0.010575999816258749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,31,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,31,0.010570666442314783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,63,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,63,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,63,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,63,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,127,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,127,0.010538666198650995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,127,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,127,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,255,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,255,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,255,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,255,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,511,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,511,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,511,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,511,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,1023,0.009056000038981438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,1023,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,1023,0.010490667074918747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,1023,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,2047,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,2047,0.009919999788204828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,2047,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,2047,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,4095,0.01022933361430963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,4095,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,4095,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,4095,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,8191,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,8191,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,8191,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,8191,0.01598400001724561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,16383,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,16383,0.017743999759356182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,16383,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,16383,0.017743999759356182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,32767,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,32767,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,32767,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,32767,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,65535,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,65535,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,65535,0.021216000119845074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,65535,0.02096533278624217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,131071,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,131071,0.012549333274364471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,131071,0.027461332579453785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,131071,0.0271519993742307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,1,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,1,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,1,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,1,0.01003200002014637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,3,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,3,0.00897066667675972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,3,0.009546666716535887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,3,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,7,0.009066666786869368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,7,0.008922666932145754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,7,0.010144000252087912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,7,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,15,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,15,0.010144000252087912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,15,0.009343999748428663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,15,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,31,0.009045333291093508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,31,0.009173333023985228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,31,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,31,0.010079999764760336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,63,0.00897066667675972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,63,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,63,0.010213333492477735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,63,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,127,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,127,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,127,0.010464000205198923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,127,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,255,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,255,0.009114666531483332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,255,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,255,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,511,0.009733333562811216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,511,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,511,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,511,0.011413333316644033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,1023,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,1023,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,1023,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,1023,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,2047,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,2047,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,2047,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,2047,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,4095,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,4095,0.012629333883523941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,4095,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,4095,0.012608000387748083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,8191,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,8191,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,8191,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,8191,0.015583999454975128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,16383,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,16383,0.02081599955757459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,16383,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,16383,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,32767,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,32767,0.023130667706330616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,32767,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,32767,0.021370666722456615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,65535,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,65535,0.023423999547958374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,65535,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,65535,0.023386667172114056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,131071,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,131071,0.027327999472618103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,1,0.009797333429257074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,1,0.009583999713261923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,131071,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,1,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,1,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,3,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,3,0.009173333023985228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,3,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,3,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,7,0.009205333267649015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,7,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,7,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,7,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,15,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,15,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,15,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,15,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,31,0.010490667074918747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,31,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,31,0.011541333049535751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,31,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,63,0.009328000247478485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,131071,0.025562666356563568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,63,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,63,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,63,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,127,0.008890666688481966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,127,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,127,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,127,0.012319999436537424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,255,0.009194666519761086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,255,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,255,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,255,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,511,0.009194666519761086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,511,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,511,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,511,0.011488000551859537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,1023,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,1023,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,1023,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,1023,0.013317332913478216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,2047,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,2047,0.012847999731699625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,2047,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,2047,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,4095,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,4095,0.011541333049535751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,4095,0.013349333157142004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,4095,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,8191,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,8191,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,8191,0.011370666325092316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,8191,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,16383,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,16383,0.019258666783571243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,16383,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,16383,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,32767,0.011407999942700068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,32767,0.02111999938885371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,32767,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,32767,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,65535,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,65535,0.021344001094500225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,65535,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,65535,0.021589333812395733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,131071,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,131071,0.02734400083621343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,1,0.010421333213647207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,131071,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,1,0.01032533310353756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,1,0.010458666831254959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,1,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,3,0.011237333218256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,3,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,3,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,3,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,7,0.010357333347201347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,7,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,7,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,7,0.010341333225369453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,15,0.010527999450763067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,15,0.008976000050703684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,131071,0.02757333219051361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,15,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,15,0.01020800011853377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,31,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,31,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,31,0.009754666437705358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,31,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,63,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,63,0.00891733355820179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,63,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,63,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,127,0.009008000294367472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,127,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,127,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,127,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,255,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,255,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,255,0.010239999741315842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,255,0.011626667032639185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,511,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,511,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,511,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,511,0.011690666278203329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,1023,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,1023,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,1023,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,1023,0.011722666521867117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,2047,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,2047,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,2047,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,4095,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,2047,0.011418666690587997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,4095,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,4095,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,4095,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,8191,0.015594666202863058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,8191,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,8191,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,8191,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,16383,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,16383,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,16383,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,16383,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,32767,0.011349332829316458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,32767,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,32767,0.01939733326435089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,32767,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,65535,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,65535,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,65535,0.02370133250951767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,65535,0.023034666975339253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,131071,0.012666666259368261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,131071,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,131071,0.03972266614437103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,131071,0.031557333966096245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,1,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,1,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,1,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,1,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,3,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,3,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,3,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,3,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,7,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,7,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,7,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,7,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,15,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,15,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,15,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,15,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,31,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,31,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,31,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,31,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,63,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,63,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,63,0.011333333949247995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,63,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,127,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,127,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,127,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,127,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,255,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,255,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,255,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,255,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,511,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,511,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,511,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,511,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,1023,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,1023,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,1023,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,1023,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,2047,0.013317332913478216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,2047,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,2047,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,2047,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,4095,0.01328533391157786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,4095,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,4095,0.013269333789745966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,4095,0.01939733326435089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,8191,0.01331199953953425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,8191,0.023872000475724537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,8191,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,8191,0.023018665611743927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,16383,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,16383,0.039962666730086006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,16383,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,16383,0.031119999786218006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,32767,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,32767,0.06012799839178721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,1,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,32767,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,1,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,1,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,3,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,3,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,1,0.012661332885424295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,3,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,3,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,7,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,7,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,7,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,32767,0.051818668842315674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,7,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,15,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,15,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,15,0.011445333560307821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,15,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,31,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,31,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,31,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,31,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,63,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,63,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,63,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,127,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,63,0.011237333218256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,127,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,127,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,127,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,255,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,255,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,255,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,255,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,511,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,511,0.011407999942700068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,511,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,511,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,1023,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,1023,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,1023,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,1023,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,2047,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,2047,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,2047,0.013973332941532135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,2047,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,4095,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,4095,0.022986667851607006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,4095,0.013845333208640417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,4095,0.021562665700912476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,8191,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,8191,0.013434667140245438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,8191,0.03890133400758108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,8191,0.03141866624355316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,16383,0.013343999783198038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,16383,0.06060799956321716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,16383,0.013343999783198038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,16383,0.05172266562779745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,32767,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,32767,0.10128532846768697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,1,0.011744000017642975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,32767,0.013610667238632837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,1,0.01239466667175293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,1,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,1,0.01268799975514412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,3,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,3,0.012773333738247553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,32767,0.08724799752235413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,3,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,3,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,7,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,7,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,7,0.012693333129088083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,7,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,15,0.011503999431928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,15,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,15,0.012304000556468964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,15,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,31,0.011695999652147293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,31,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,31,0.011365332951148352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,31,0.012543999900420507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,63,0.012613333761692047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,63,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,63,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,63,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,127,0.011498666057984034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,127,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,127,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,127,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,255,0.011381333072980246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,255,0.011391999820868174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,255,0.011706666400035223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,511,0.012693333129088083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,511,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,511,0.011989332735538483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,255,0.012202666451533636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,511,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,1023,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,1023,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,1023,0.01138666644692421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,1023,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,2047,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,2047,0.023423999547958374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,2047,0.021541332205136616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,2047,0.015765332927306492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,4095,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,4095,0.039706667264302574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,4095,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,4095,0.030608000854651134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,8191,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,8191,0.060133333007494606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,8191,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,8191,0.052058666944503784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,16383,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,16383,0.10352533062299092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,16383,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,16383,0.08711999654769897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,32767,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,32767,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,32767,0.18751466274261475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,32767,0.15829867124557495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,1,0.00926399976015091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,1,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,1,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,1,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,3,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,3,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,3,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,3,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,7,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,7,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,7,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,7,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,15,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,15,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,15,0.010293333480755487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,15,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,31,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,31,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,31,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,31,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,63,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,63,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,63,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,63,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,127,0.009765333185593287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,127,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,127,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,127,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,255,0.010224000240365664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,255,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,255,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,255,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,511,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,511,0.011365332951148352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,511,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,511,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,1023,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,1023,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,1023,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,1023,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,2047,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,2047,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,2047,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,2047,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,4095,0.012752000242471695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,4095,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,4095,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,4095,0.01328533391157786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,8191,0.011354666203260422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,8191,0.018005333840847015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,8191,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,8191,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,16383,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,16383,0.019482667247454327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,16383,0.012058666596810022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,16383,0.019354666272799175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,32767,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,32767,0.021386665602525074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,32767,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,32767,0.021594665944576263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,65535,0.011429333438475927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,65535,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,65535,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,65535,0.021674667795499165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,131071,0.01328533391157786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,131071,0.029738667110602062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,1,0.010527999450763067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,1,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,131071,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,1,0.010533332824707031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,1,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,3,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,3,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,3,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,3,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,7,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,7,0.009712000067035357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,7,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,7,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,15,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,15,0.009445333232482275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,15,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,15,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,31,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,31,0.009541333342591921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,31,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,131071,0.02752000093460083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,63,0.010522666076819101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,31,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,63,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,63,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,127,0.009674666449427605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,127,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,127,0.010543999572594961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,127,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,255,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,255,0.009434666484594345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,255,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,255,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,511,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,511,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,511,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,511,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,1023,0.010581333190202713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,1023,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,1023,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,1023,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,2047,0.012666666259368261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,2047,0.01341333364446958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,2047,0.013327999661366144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,2047,0.013237333546082178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,4095,0.012741333494583765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,4095,0.015541333705186844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,4095,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,4095,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,8191,0.012831999609867731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,8191,0.017701332767804463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,8191,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,8191,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,16383,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,16383,0.019343999524911244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,16383,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,16383,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,32767,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,32767,0.02086399992307027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,32767,0.01257066677014033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,32767,0.01945066700379054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,65535,0.012784000486135483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,65535,0.02533866713444392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,65535,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,65535,0.023936000963052113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,131071,0.013951999445756277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,131071,0.0422986646493276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,1,0.009919999788204828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,1,0.010026666646202406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,1,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,1,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,131071,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,3,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,3,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,3,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,3,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,7,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,7,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,7,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,7,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,15,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,15,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,15,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,15,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,131071,0.03369600077470144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,31,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,31,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,31,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,31,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,63,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,63,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,63,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,63,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,127,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,127,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,127,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,127,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,255,0.010618666807810465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,255,0.00926399976015091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,255,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,255,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,511,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,511,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,511,0.011333333949247995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,511,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,1023,0.010527999450763067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,1023,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,1023,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,1023,0.011391999820868174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,2047,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,2047,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,2047,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,2047,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,4095,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,4095,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,4095,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,8191,0.011786667009194693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,4095,0.01754666616519292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,8191,0.017450666675964992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,8191,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,8191,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,16383,0.020367999871571858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,16383,0.013253333667914072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,16383,0.012831999609867731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,16383,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,32767,0.012815999488035837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,32767,0.025424001117547352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,32767,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,32767,0.02314666658639908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,65535,0.012479999413092932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,65535,0.03967999915281931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,65535,0.012746666868527731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,65535,0.03141333411137263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,131071,0.014058666924635569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,131071,0.060319999853769936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,131071,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,131071,0.05179200073083242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,1,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,1,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,1,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,1,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,3,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,3,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,3,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,3,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,7,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,7,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,7,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,7,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,15,0.011338666081428528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,15,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,15,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,15,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,31,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,31,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,31,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,31,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,63,0.011338666081428528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,63,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,63,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,63,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,127,0.011354666203260422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,127,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,127,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,127,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,255,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,255,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,255,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,255,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,511,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,511,0.012074666718641916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,511,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,511,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,1023,0.010575999816258749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,1023,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,1023,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,1023,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,2047,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,2047,0.01747200017174085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,2047,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,2047,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,4095,0.013408000270525614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,4095,0.02333866556485494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,4095,0.013306666165590286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,4095,0.021695998807748158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,8191,0.01357866699496905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,8191,0.03865066667397817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,8191,0.013637332866589228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,8191,0.031717332700888314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,16383,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,16383,0.05881600081920624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,1,0.011509332805871964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,16383,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,1,0.011525332927703857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,1,0.01232533281048139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,1,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,3,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,3,0.011551999797423681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,16383,0.05194666484991709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,3,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,3,0.012773333738247553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,7,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,7,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,7,0.012538666526476542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,7,0.011893333246310553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,15,0.012789333860079447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,15,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,15,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,31,0.01268799975514412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,31,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,15,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,31,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,63,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,31,0.012543999900420507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,63,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,63,0.011365332951148352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,63,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,127,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,127,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,127,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,127,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,255,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,255,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,255,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,255,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,511,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,511,0.013167999684810638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,511,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,511,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,1023,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,1023,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,1023,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,1023,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,2047,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,2047,0.023381332556406658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,2047,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,2047,0.023290666441122692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,4095,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,4095,0.03988266736268997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,4095,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,4095,0.03141333411137263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,8191,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,8191,0.06135466694831848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,8191,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,8191,0.05180799961090088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,16383,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,16383,0.1034346620241801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,16383,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,16383,0.08713600039482117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,1,0.013253333667914072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,1,0.013760000467300415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,1,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,1,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,3,0.014896000425020853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,3,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,3,0.013359999905029932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,3,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,7,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,7,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,7,0.013301332791646322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,7,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,15,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,15,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,15,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,15,0.01368533323208491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,31,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,31,0.01328533391157786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,31,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,31,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,63,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,63,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,63,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,63,0.013248000293970108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,127,0.01328533391157786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,127,0.013167999684810638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,127,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,127,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,255,0.01328533391157786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,255,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,255,0.013546666751305262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,255,0.01328533391157786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,511,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,511,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,511,0.014773332824309668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,511,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,1023,0.01463466634353002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,1023,0.02233600119749705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,1023,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,1023,0.021151999632517498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,2047,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,2047,0.0399893323580424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,2047,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,2047,0.03146133323510488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,4095,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,4095,0.06107733150323232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,4095,0.017557332913080852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,4095,0.053130666414896645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,8191,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,8191,0.10557333628336589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,8191,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,8191,0.08691199620564778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,16383,0.017504000415404636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,16383,0.19003732999165854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,16383,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,1,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,1,0.011765333513418833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,1,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,1,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,16383,0.15812266866366068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,3,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,3,0.012629333883523941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,3,0.011663999408483505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,7,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,3,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,7,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,7,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,7,0.012826666235923767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,15,0.011989332735538483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,15,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,15,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,15,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,31,0.012282667060693106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,31,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,31,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,31,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,63,0.012671999633312225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,63,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,63,0.011994666109482447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,63,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,127,0.012335999558369318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,127,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,127,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,127,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,255,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,255,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,255,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,255,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,511,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,511,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,511,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,511,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,1023,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,1023,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,1023,0.012485332787036896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,1023,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,2047,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,2047,0.023205332458019257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,2047,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,2047,0.021386665602525074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,4095,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,4095,0.03961600114901861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,4095,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,4095,0.031317333380381264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,1,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,1,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,1,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,1,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,3,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,3,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,3,0.013253333667914072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,3,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,7,0.014853333433469137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,7,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,7,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,7,0.014463999619086584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,15,0.013408000270525614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,15,0.014794666320085526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,15,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,15,0.013290667285521826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,31,0.014773332824309668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,31,0.013397333522637686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,31,0.013253333667914072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,31,0.014762666076421738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,63,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,63,0.01321600005030632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,63,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,63,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,127,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,127,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,127,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,127,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,255,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,255,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,255,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,255,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,511,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,511,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,511,0.013306666165590286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,511,0.016890666137139004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,1023,0.013301332791646322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,1023,0.021381333470344543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,1023,0.014837333311637243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,1023,0.01945066700379054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,2047,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,2047,0.039877332746982574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,2047,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,2047,0.031727999448776245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,4095,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,4095,0.06115200122197469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,4095,0.017488000293572743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,1,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,1,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,1,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,4095,0.052229334910710655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,1,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,3,0.017514667163292568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,3,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,3,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,7,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,3,0.017498667041460674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,7,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,7,0.01860800012946129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,7,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,15,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,15,0.017642666896184284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,15,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,15,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,31,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,31,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,31,0.016858667135238647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,31,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,63,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,63,0.01756799966096878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,63,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,63,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,127,0.017551999539136887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,127,0.016789333273967106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,127,0.016389333953460056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,127,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,255,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,255,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,255,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,255,0.0174346665541331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,511,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,511,0.02186666677395503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,511,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,511,0.021418665846188862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,1023,0.01758933315674464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,1023,0.03378133227427801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,1023,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,1023,0.027893332143624622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,2047,0.021082667013009388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,2047,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,2047,0.05825066566467285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,2047,0.05040533343950907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,4095,0.02091199904680252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,4095,0.021104000508785248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,4095,0.09645866354306538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,4095,0.08084266881148021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,1,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,1,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,1,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,1,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,3,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,3,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,3,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,3,0.013269333789745966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,7,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,7,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,7,0.013850666582584381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,7,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,15,0.013290667285521826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,15,0.014794666320085526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,15,0.013616000612576803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,15,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,31,0.014709333578745524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,31,0.01458666721979777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,31,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,31,0.014789332946141561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,63,0.014783999572197596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,63,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,63,0.013376000026861826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,63,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,127,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,127,0.01351999988158544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,127,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,127,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,255,0.014853333433469137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,255,0.013365333278973898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,255,0.014202666779359182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,255,0.01339200014869372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,511,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,511,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,511,0.014602666099866232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,511,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,1023,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,1023,0.02149333308140437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,1023,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,1023,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,2047,0.017093333105246227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,2047,0.0400693342089653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,2047,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,1,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,1,0.017498667041460674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,1,0.016842667013406754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,2047,0.03159466634194056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,1,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,3,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,3,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,3,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,3,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,7,0.017632000148296356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,7,0.017450666675964992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,7,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,7,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,15,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,15,0.017477333545684814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,15,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,15,0.017498667041460674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,31,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,31,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,31,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,31,0.01687466725707054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,63,0.017445333302021027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,63,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,63,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,63,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,127,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,127,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,127,0.017498667041460674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,127,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,255,0.018021332720915478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,255,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,255,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,255,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,511,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,511,0.02319466571013133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,511,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,511,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,1023,0.018394666413466137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,1023,0.03523733218510946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,1023,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,1023,0.029045333464940388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,2047,0.021349333226680756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,2047,0.05876266459623972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,2047,0.019706666469573975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,2047,0.05022400120894114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,1,0.02516799916823705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,1,0.025253333151340485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,1,0.023232000569502514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,1,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,3,0.02533866713444392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,3,0.02552533398071925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,3,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,3,0.023306667804718018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,7,0.025274666647116344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,7,0.0249439999461174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,7,0.0235359991590182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,7,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,15,0.025077333052953083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,15,0.025146665672461193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,15,0.02312533309062322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,15,0.02319466571013133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,31,0.02497600018978119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,31,0.025114665428797405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,31,0.022869333624839783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,31,0.023183998962243397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,63,0.025418666501839954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,63,0.025413334369659424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,63,0.023183998962243397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,63,0.023178666830062866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,127,0.02497600018978119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,127,0.02531733363866806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,127,0.02347733328739802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,127,0.023472001155217487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,255,0.025562666356563568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,255,0.02526933451493581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,255,0.024160000185171764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,255,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,511,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,511,0.03761066744724909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,511,0.023647998770078022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,511,0.03161599983771642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,1023,0.025722667574882507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,1023,0.05608533322811127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,1023,0.025727999707063038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,1023,0.0499839981396993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,2047,0.027600000301996868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,2047,0.09808533390363057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,2047,0.027535999814669292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,1,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,1,0.009039999917149544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,1,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,1,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,3,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,3,0.009189333145817121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,2047,0.08296533425649007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,3,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,3,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,7,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,7,0.010255999863147736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,7,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,7,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,15,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,15,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,15,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,15,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,31,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,31,0.010304000228643417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,31,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,31,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,63,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,63,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,63,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,63,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,127,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,127,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,127,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,127,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,255,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,255,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,255,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,255,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,511,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,511,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,511,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,511,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,1023,0.010202666744589806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,1023,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,1023,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,1023,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,2047,0.011338666081428528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,2047,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,2047,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,2047,0.012805332740147909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,4095,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,4095,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,4095,0.012698666503032049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,4095,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,8191,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,8191,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,8191,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,8191,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,16383,0.011354666203260422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,16383,0.01870399961868922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,16383,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,16383,0.01746133342385292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,32767,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,32767,0.02090666691462199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,32767,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,32767,0.01934933289885521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,65535,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,65535,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,65535,0.025194667279720306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,65535,0.023120000958442688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,131071,0.014815999815861383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,131071,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,131071,0.03342933456103007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,131071,0.043791999419530235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,1,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,1,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,1,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,1,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,3,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,3,0.010506667196750641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,3,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,3,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,7,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,7,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,7,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,7,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,15,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,15,0.01055466632048289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,15,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,15,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,31,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,31,0.010549332946538925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,31,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,31,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,63,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,63,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,63,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,63,0.010496000448862711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,127,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,127,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,127,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,127,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,255,0.010415999839703241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,255,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,255,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,255,0.010575999816258749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,511,0.012106666962305704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,511,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,511,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,511,0.012725333372751871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,1023,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,1023,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,1023,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,1023,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,2047,0.012693333129088083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,2047,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,2047,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,2047,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,4095,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,4095,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,4095,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,4095,0.01687466725707054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,8191,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,8191,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,8191,0.012757333616415659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,8191,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,16383,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,16383,0.02067733307679494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,16383,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,16383,0.019551999866962433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,32767,0.012773333738247553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,32767,0.025194667279720306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,32767,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,32767,0.023455999791622162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,65535,0.012432000289360682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,65535,0.04112533231576284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,65535,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,65535,0.03137599925200144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,131071,0.014117332796255747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,131071,0.061162665486335754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,1,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,1,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,1,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,1,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,3,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,3,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,131071,0.014096000542243322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,3,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,3,0.012639999389648438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,7,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,7,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,7,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,7,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,15,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,15,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,15,0.011237333218256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,15,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,31,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,31,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,31,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,31,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,63,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,63,0.011482667177915573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,63,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,131071,0.052485331892967224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,127,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,63,0.010559999694426855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,127,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,127,0.011237333218256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,127,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,255,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,255,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,255,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,255,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,511,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,511,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,511,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,511,0.0120319997270902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,1023,0.010565333068370819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,1023,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,1023,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,1023,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,2047,0.012741333494583765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,2047,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,2047,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,2047,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,4095,0.012805332740147909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,4095,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,4095,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,4095,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,8191,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,8191,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,8191,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,8191,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,16383,0.011557333171367645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,16383,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,16383,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,16383,0.02163733293612798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,32767,0.012682666381200155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,32767,0.037989333271980286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,32767,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,32767,0.029557332396507263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,65535,0.011711999773979187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,65535,0.05977599819501241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,65535,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,65535,0.05220800141493479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,131071,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,131071,0.10326932867368062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,131071,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,1,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,1,0.01754666616519292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,131071,0.08899199962615967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,1,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,1,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,3,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,3,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,3,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,3,0.017407999684413273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,7,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,7,0.01754133279124896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,7,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,15,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,7,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,15,0.018485333770513535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,15,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,15,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,31,0.01833600054184596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,31,0.01851733277241389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,31,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,31,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,63,0.017968000223239262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,63,0.018933333456516266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,63,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,63,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,127,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,127,0.01735466718673706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,127,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,127,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,255,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,255,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,255,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,255,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,511,0.017498667041460674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,511,0.023247999449570973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,511,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,511,0.021183999876181286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,1023,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,1023,0.03438399980465571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,1023,0.01893866683046023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,1,0.02553066611289978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,1023,0.029472000896930695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,1,0.025306666890780132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,1,0.023455999791622162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,1,0.0239680012067159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,3,0.02537599951028824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,3,0.02573866645495097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,3,0.02350933353106181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,3,0.024341332415739696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,7,0.025407999753952026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,7,0.02553066611289978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,7,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,7,0.023962666591008503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,15,0.025290665527184803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,15,0.02535466601451238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,15,0.023605334262053173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,15,0.02348800003528595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,31,0.025248001019159954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,31,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,31,0.025616000096003216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,31,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,63,0.02532800038655599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,63,0.025237334271272022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,63,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,63,0.024234667420387268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,127,0.025386666258176167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,127,0.025311999022960663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,127,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,127,0.02363733450571696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,255,0.025594666600227356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,255,0.025274666647116344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,255,0.025285333395004272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,255,0.023605334262053173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,511,0.03757333258787791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,511,0.02553066611289978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,511,0.025061334172884624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,511,0.03209600100914637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,1023,0.025402667621771496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,1023,0.05641066531340281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,1023,0.02518400053183238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,1,0.039749334255854286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,1,0.03956799954175949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,1023,0.047824000318845115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,1,0.035887998839219414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,1,0.037861332297325134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,3,0.03955200066169103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,3,0.03771200031042099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,3,0.039621333281199135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,3,0.03586133321126302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,7,0.03987200061480204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,7,0.035674666364987694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,7,0.039621333281199135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,7,0.03749333322048187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,15,0.03950933367013931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,15,0.040005333721637726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,15,0.035973332822322845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,15,0.03751466671625773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,31,0.03994133323431015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,31,0.03579200059175491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,31,0.03737599899371465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,31,0.040421334405740104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,63,0.039808000127474465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,63,0.03995199998219808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,63,0.03740799923737844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,63,0.03787733366092046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,127,0.03972266614437103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,127,0.039493332306543984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,127,0.03571200122435888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,127,0.03602133442958196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,255,0.042037333051363625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,255,0.041946664452552795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,255,0.03749866783618927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,255,0.03572266548871994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,511,0.042064001162846885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,511,0.03754133234421412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,511,0.06229866544405619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,511,0.054373333851496376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,1023,0.042026668787002563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,1023,0.038319999972979225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,1023,0.0974133312702179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,1023,0.08303999900817871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,1,0.02571200082699458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,1,0.026608000199000042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,1,0.02347733328739802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,1,0.025279998779296875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,3,0.025306666890780132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,3,0.026895999908447266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,3,0.025173333783944447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,3,0.02521066615978877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,7,0.025568000972270966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,7,0.026821332673231762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,7,0.02349333216746648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,7,0.025397333006064098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,15,0.025557334224383037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,15,0.02621866762638092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,15,0.02532800038655599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,15,0.025381334125995636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,31,0.025290665527184803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,31,0.02643200010061264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,31,0.025290665527184803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,31,0.02548266698916753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,63,0.025381334125995636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,63,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,63,0.025472000241279602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,63,0.025653332471847534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,127,0.025562666356563568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,127,0.025445332129796345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,127,0.02526933451493581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,127,0.02589333305756251
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,255,0.027248000105222065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,255,0.02571200082699458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,255,0.025589334468046825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,255,0.02587733417749405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,511,0.027242665489514668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,511,0.03755199909210205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,511,0.025568000972270966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,511,0.033802665770053864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,1,0.03783999880154928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,1,0.03832533210515976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,3,0.04070399949947993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,3,0.0401653324564298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,3,0.03798400113979975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,1,0.04048533240954081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,7,0.04030400017897288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,1,0.040378667414188385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,7,0.03984000037113825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,7,0.03796799977620443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,7,0.037690666814645134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,15,0.040106666584809623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,15,0.0420959989229838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,15,0.037685332198937736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,3,0.03825066735347112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,15,0.03736000011364619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,31,0.041573333243529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,31,0.03984533250331879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,31,0.037647999823093414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,31,0.0377813329299291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,63,0.04162666698296865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,63,0.04125333329041799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,63,0.037589333951473236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,63,0.03796799977620443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,127,0.04002666721741358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,127,0.039850667119026184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,127,0.03774400055408478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,127,0.037471999724706016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,255,0.04173333446184794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,255,0.03976533313592275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,255,0.03760000069936117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,511,0.04252799848715464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,511,0.03955733279387156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,511,0.06251200040181477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,511,0.054192001620928444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,1,0.07067200044790904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,1,0.0706826647122701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,1,0.06435733536879222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,255,0.042730664213498436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,1,0.06420266628265381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,3,0.07021333277225494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,3,0.070592001080513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,3,0.0642133355140686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,3,0.06457066535949707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,7,0.07045333087444305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,7,0.07030933101971944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,7,0.06419733166694641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,7,0.06414400041103363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,15,0.0706879993279775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,15,0.07032533486684163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,15,0.06434666613737743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,15,0.06411199768384297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,31,0.07036800185839336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,31,0.07070933282375336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,31,0.06439466774463654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,31,0.06447466711203258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,63,0.07047999898592631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,63,0.07045333087444305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,63,0.062314664324124656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,63,0.06482133269309998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,127,0.0707893321911494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,127,0.07100800176461537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,127,0.06243733565012614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,127,0.06273599962393443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,255,0.07487466434637706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,255,0.07288533449172974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,255,0.0665280024210612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,255,0.06681066751480103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,511,0.07458666463692983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,1,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,511,0.06656533479690552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,511,0.11202667156855266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,1,0.012213333199421564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,1,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,1,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,3,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,3,0.010453333457310995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,3,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,3,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,7,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,7,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,7,0.010464000205198923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,15,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,7,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,15,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,15,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,15,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,511,0.0960106650988261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,31,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,31,0.009525333220760027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,31,0.01246400053302447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,63,0.009695999945203463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,31,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,63,0.009829333052039146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,63,0.011349332829316458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,63,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,127,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,127,0.009829333052039146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,127,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,127,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,255,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,255,0.010133333504199982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,255,0.010426666587591171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,511,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,255,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,511,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,511,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,1023,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,1023,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,511,0.011472000430027643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,1023,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,1023,0.011717333147923151
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,2047,0.01793066660563151
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,2047,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,2047,0.012725333372751871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,2047,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,4095,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,4095,0.012800000607967377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,4095,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,4095,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,8191,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,8191,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,8191,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,8191,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,16383,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,16383,0.02094399929046631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,16383,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,16383,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,32767,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,32767,0.025562666356563568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,32767,0.01349866638580958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,32767,0.02348800003528595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,65535,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,65535,0.041162667175134025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,65535,0.012847999731699625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,65535,0.03160000095764796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,131071,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,131071,0.0617439995209376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,1,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,131071,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,1,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,1,0.011535999675591787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,1,0.011578666667143503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,3,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,3,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,3,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,7,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,3,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,7,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,7,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,7,0.011567999919255575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,15,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,15,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,15,0.011477333803971609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,31,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,15,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,31,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,31,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,31,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,63,0.010106666634480158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,63,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,63,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,131071,0.05221866567929586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,63,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,127,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,127,0.011450666934251785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,127,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,127,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,255,0.009813333551088968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,255,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,255,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,511,0.009402666861812273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,255,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,511,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,511,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,511,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,1023,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,1023,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,1023,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,1023,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,2047,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,2047,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,2047,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,2047,0.01328533391157786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,4095,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,4095,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,4095,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,4095,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,8191,0.012794667234023413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,8191,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,8191,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,8191,0.018426666657129925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,16383,0.012773333738247553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,16383,0.023562667270501454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,16383,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,16383,0.0232640008131663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,32767,0.012831999609867731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,32767,0.03997333347797394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,32767,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,32767,0.03154666721820831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,65535,0.013466666142145792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,65535,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,65535,0.05996266504128774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,65535,0.052058666944503784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,131071,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,131071,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,131071,0.10362666845321655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,131071,0.08916800220807393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,1,0.009418666362762451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,1,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,1,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,1,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,3,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,3,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,3,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,3,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,7,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,7,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,7,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,7,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,15,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,15,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,15,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,15,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,31,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,31,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,31,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,31,0.011477333803971609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,63,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,63,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,63,0.010522666076819101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,63,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,127,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,127,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,127,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,127,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,255,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,255,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,255,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,255,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,511,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,511,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,511,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,511,0.011424000064531961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,1023,0.010458666831254959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,1023,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,1023,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,1023,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,2047,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,2047,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,2047,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,2047,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,4095,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,4095,0.018245333184798557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,4095,0.013349333157142004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,4095,0.018197332819302876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,8191,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,8191,0.023434666295846302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,8191,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,8191,0.021503999829292297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,16383,0.01321600005030632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,16383,0.03930133332808813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,16383,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,16383,0.03145066648721695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,32767,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,32767,0.05931200087070465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,32767,0.013386666774749756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,32767,0.052058666944503784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,65535,0.013354666531085968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,65535,0.09970133503278096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,65535,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,65535,0.08691199620564778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,131071,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,1,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,1,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,131071,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,131071,0.1855306625366211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,3,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,7,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,3,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,131071,0.15914666652679443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,7,0.011760000139474869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,15,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,15,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,31,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,31,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,63,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,63,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,127,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,255,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,127,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,255,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,511,0.012794667234023413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,511,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,1023,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,1023,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,2047,0.01653333380818367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,2047,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,4095,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,4095,0.017722666263580322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,8191,0.021333334346612293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,8191,0.019007999449968338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,16383,0.0402399996916453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,16383,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,32767,0.06198399762312571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,32767,0.03867733230193456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,1,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,1,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,3,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,3,0.010549332946538925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,7,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,7,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,15,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,15,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,31,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,31,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,65535,0.10353066523869832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,65535,0.059749335050582886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,63,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,63,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,127,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,127,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,255,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,255,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,511,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,511,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,1023,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,1023,0.013397333522637686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,2047,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,2047,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,4095,0.021402666966120403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,4095,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,8191,0.040778666734695435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,8191,0.02364266663789749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,16383,0.06176533301671346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,16383,0.03968533376852671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,32767,0.10372266173362732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,32767,0.05830933153629303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,1,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,1,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,65535,0.1881600022315979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,3,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,3,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,7,0.011445333560307821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,7,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,65535,0.0983679989973704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,15,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,15,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,31,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,31,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,63,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,63,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,127,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,127,0.011461333682139715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,255,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,255,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,511,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,511,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,1023,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,1023,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,2047,0.021082667013009388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,2047,0.019007999449968338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,4095,0.038378665844599404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,4095,0.023418667415777843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,8191,0.06035199761390686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,8191,0.03789866715669632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,16383,0.10288000106811523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,16383,0.05845866600672404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,32767,0.18525334199269614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,1,0.009136000027259191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,32767,0.09699733058611552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,1,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,3,0.011354666203260422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,3,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,7,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,7,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,15,0.009258666386206945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,15,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,31,0.009317333499590555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,31,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,63,0.009056000038981438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,63,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,127,0.009119999905427298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,127,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,65535,0.3535786469777425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,255,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,255,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,511,0.011541333049535751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,511,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,65535,0.17591466506322226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,1023,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,1023,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,2047,0.012682666381200155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,2047,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,4095,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,8191,0.015557333827018738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,4095,0.013552000125249227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,8191,0.016837333639462788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,16383,0.021055998901526134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,16383,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,32767,0.027461332579453785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,32767,0.02789866675933202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,65535,0.03188266605138779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,65535,0.03126399964094162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,1,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,1,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,3,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,3,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,7,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,7,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,15,0.01137599969903628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,15,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,31,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,31,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,63,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,63,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,131071,0.035717333356539406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,131071,0.035599999129772186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,127,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,127,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,255,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,255,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,511,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,511,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,1023,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,1023,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,2047,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,2047,0.012810666114091873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,4095,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,4095,0.011882666498422623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,8191,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,8191,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,16383,0.020015999674797058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,16383,0.021226666867733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,32767,0.023792001108328503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,32767,0.022991999983787537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,65535,0.025594666600227356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,65535,0.023567999402681988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,1,0.01009599988659223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,1,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,3,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,3,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,7,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,131071,0.029738667110602062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,7,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,15,0.009408000235756239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,15,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,31,0.010079999764760336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,131071,0.0276053324341774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,31,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,63,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,63,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,127,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,127,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,255,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,255,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,511,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,511,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,1023,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,1023,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,2047,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,2047,0.012133333832025528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,4095,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,4095,0.012661332885424295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,8191,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,8191,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,16383,0.018858666221300762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,16383,0.019039999693632126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,32767,0.02123733361562093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,32767,0.019440000255902607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,65535,0.025813333690166473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,65535,0.02109866589307785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,1,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,1,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,3,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,3,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,7,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,7,0.01191466674208641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,15,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,131071,0.04466133316357931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,15,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,31,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,31,0.013983999689420065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,63,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,63,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,127,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,131071,0.027552001178264618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,127,0.012448000411192576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,255,0.009514666472872099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,255,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,511,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,1023,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,1023,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,511,0.012762666990359625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,2047,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,2047,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,4095,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,4095,0.013872000078360239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,8191,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,8191,0.017093333105246227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,16383,0.020101333657900494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,16383,0.020207999895016353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,32767,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,32767,0.023573334018389385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,65535,0.02565866708755493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,1,0.008922666932145754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,65535,0.023541333774725597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,1,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,3,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,3,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,7,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,7,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,15,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,15,0.01137599969903628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,31,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,31,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,63,0.011482667177915573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,63,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,131071,0.03146666785081228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,127,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,131071,0.027263998985290527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,255,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,127,0.010496000448862711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,255,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,511,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,511,0.011493333925803503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,1023,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,1023,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,2047,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,2047,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,4095,0.012815999488035837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,4095,0.013167999684810638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,8191,0.017637333522240322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,8191,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,16383,0.019546666493018467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,16383,0.019472000499566395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,32767,0.021375998854637146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,32767,0.02037866661945979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,65535,0.025407999753952026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,65535,0.022970666488011677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,1,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,1,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,3,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,3,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,7,0.010293333480755487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,7,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,15,0.009941333283980688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,131071,0.04497600098450979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,15,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,31,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,31,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,131071,0.02934933453798294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,63,0.009706666693091393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,63,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,127,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,127,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,255,0.012005332857370377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,255,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,511,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,511,0.011488000551859537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,1023,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,1023,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,2047,0.012794667234023413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,2047,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,4095,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,4095,0.015552000453074774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,8191,0.01747200017174085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,8191,0.019317333896954853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,16383,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,16383,0.01770666614174843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,32767,0.02310933421055476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,32767,0.019050666441520054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,65535,0.040175999204317726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,65535,0.02330133318901062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,1,0.011887999872366587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,1,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,3,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,3,0.011968000481526056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,7,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,7,0.011706666400035223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,15,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,131071,0.061386664708455406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,15,0.011519999553759893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,31,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,63,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,31,0.011589333415031433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,63,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,127,0.011535999675591787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,127,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,131071,0.04167999823888143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,255,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,255,0.011829332758982977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,511,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,511,0.011584000041087469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,1023,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,1023,0.012831999609867731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,2047,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,2047,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,4095,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,4095,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,8191,0.041290665666262306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,8191,0.023962666591008503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,16383,0.06264000137646993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,16383,0.03957333415746689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,1,0.01137599969903628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,1,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,3,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,3,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,7,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,32767,0.10371733705202739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,7,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,15,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,15,0.011402666568756104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,31,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,31,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,32767,0.06002666552861532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,63,0.011237333218256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,63,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,127,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,127,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,255,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,511,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,255,0.01137599969903628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,511,0.012106666962305704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,1023,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,1023,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,2047,0.021221332252025604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,2047,0.017797333498795826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,4095,0.040250666439533234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,4095,0.023210667073726654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,8191,0.06012799839178721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,8191,0.03898133337497711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,16383,0.10262399911880493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,16383,0.05842133363087972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,1,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,1,0.012762666990359625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,3,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,3,0.012709333250919977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,32767,0.18775999546051025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,7,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,32767,0.09900800387064616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,15,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,7,0.01321600005030632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,15,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,31,0.013327999661366144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,31,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,63,0.012741333494583765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,63,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,127,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,127,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,255,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,255,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,511,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,511,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,1023,0.017450666675964992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,1023,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,2047,0.03958933303753535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,2047,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,4095,0.06025066475073496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,4095,0.039834665755430855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,8191,0.10357333223025005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,8191,0.06018133461475372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,16383,0.18766399224599203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,1,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,1,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,16383,0.10190932949384053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,3,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,7,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,3,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,7,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,15,0.0103946669648091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,15,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,31,0.010058666889866194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,32767,0.35653865337371826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,31,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,63,0.010191999996701876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,63,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,255,0.009941333283980688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,127,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,127,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,32767,0.183786670366923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,255,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,511,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,511,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,1023,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,1023,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,2047,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,2047,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,4095,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,4095,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,8191,0.019466667125622433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,8191,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,16383,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,16383,0.019744000087181728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,32767,0.025429333249727886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,32767,0.023525332411130268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,65535,0.02741866558790207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,65535,0.023530667026837666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,1,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,1,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,3,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,3,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,7,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,7,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,15,0.011365332951148352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,15,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,31,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,31,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,63,0.009610666582981745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,63,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,131071,0.04708800216515859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,127,0.01022933361430963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,131071,0.029680001238981884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,127,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,255,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,255,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,511,0.011519999553759893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,511,0.012175999581813812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,1023,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,1023,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,2047,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,2047,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,4095,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,4095,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,8191,0.018954666952292126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,8191,0.01754666616519292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,16383,0.02111999938885371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,16383,0.020037333170572918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,32767,0.023141334454218548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,32767,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,65535,0.041936000188191734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,65535,0.025285333395004272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,1,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,1,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,3,0.010202666744589806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,3,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,7,0.009962666779756546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,7,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,131071,0.06449600060780843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,15,0.010048000141978264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,15,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,31,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,31,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,63,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,131071,0.04385066529115041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,63,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,127,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,127,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,255,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,255,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,511,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,511,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,1023,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,1023,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,2047,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,2047,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,4095,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,4095,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,8191,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,8191,0.019925333559513092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,16383,0.02350933353106181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,16383,0.021055998901526134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,32767,0.04081599911053976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,32767,0.025221332907676697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,65535,0.06052800019582113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,65535,0.039919999738534294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,1,0.011424000064531961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,3,0.011584000041087469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,1,0.01137599969903628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,131071,0.1030613382657369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,3,0.01166933278242747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,7,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,7,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,15,0.011461333682139715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,131071,0.061280002196629844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,31,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,15,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,31,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,63,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,63,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,127,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,127,0.011349332829316458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,255,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,255,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,511,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,511,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,1023,0.01482133318980535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,1023,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,2047,0.021477334201335907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,2047,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,4095,0.04005866746107737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,4095,0.023013333479563396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,8191,0.06115733087062836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,8191,0.03841600070397059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,1,0.01333333303531011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,1,0.011488000551859537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,3,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,3,0.012047999848922094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,16383,0.05845333139101664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,16383,0.10377599795659383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,7,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,7,0.012181332955757776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,15,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,15,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,31,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,31,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,63,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,63,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,127,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,127,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,255,0.01192533348997434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,255,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,511,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,511,0.013370666652917862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,1023,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,1023,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,2047,0.03976000100374222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,2047,0.025386666258176167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,4095,0.06247466802597046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,4095,0.03974399964014689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,8191,0.10454400380452473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,8191,0.06164266665776571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,1,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,1,0.013450667262077332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,16383,0.18813333908716837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,3,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,16383,0.10160533587137859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,3,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,7,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,7,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,15,0.013861333330472311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,15,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,31,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,31,0.014565333724021912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,63,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,63,0.013637332866589228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,127,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,127,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,255,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,255,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,511,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,511,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,1023,0.035717333356539406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,1023,0.021498667697111767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,2047,0.062080000837643944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,2047,0.040448000033696495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,4095,0.06260266900062561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,4095,0.10392000277837117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,8191,0.18861865997314453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,8191,0.10357866684595744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,1,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,16383,0.36004265149434406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,1,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,16383,0.1874986688296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,3,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,3,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,7,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,7,0.012847999731699625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,15,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,15,0.01249066616098086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,31,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,63,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,63,0.011509332805871964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,31,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,127,0.013167999684810638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,127,0.012794667234023413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,255,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,511,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,255,0.0122079998254776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,511,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,1023,0.01764800027012825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,1023,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,2047,0.04031466692686081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,2047,0.02380266785621643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,4095,0.06182933350404104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,1,0.016832000265518825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,4095,0.04009599983692169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,1,0.013493333011865616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,3,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,3,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,7,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,7,0.013898666948080063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,15,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,15,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,31,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,31,0.01339200014869372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,63,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,63,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,127,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,127,0.013424000392357508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,255,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,255,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,511,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,511,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,1023,0.03538133452335993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,1023,0.0216799999276797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,2047,0.062261333068211876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,2047,0.040218666195869446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,1,0.01937066639463107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,4095,0.10458667079607646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,1,0.017637333522240322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,3,0.019359999646743137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,4095,0.06263466676076253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,3,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,7,0.019434666881958645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,7,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,15,0.01937066639463107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,15,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,31,0.017477333545684814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,31,0.019648000597953796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,63,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,63,0.01854933301607768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,127,0.018826667219400406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,127,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,255,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,255,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,511,0.03445333242416382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,511,0.021375998854637146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,1023,0.053861334919929504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,1023,0.03386666625738144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,2047,0.09956799944241841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,2047,0.05856533348560333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,4095,0.17566933234532675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,1,0.015583999454975128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,4095,0.0972053309281667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,1,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,3,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,3,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,7,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,7,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,15,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,15,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,31,0.015642666568358738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,31,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,63,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,127,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,63,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,127,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,255,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,255,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,511,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,511,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,1023,0.035786665976047516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,1023,0.02126399924357732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,1,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,2047,0.06235733131567637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,2047,0.040661332507928215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,1,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,3,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,7,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,3,0.01854933301607768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,15,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,7,0.01773333301146825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,15,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,31,0.019882666567961376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,31,0.01876266673207283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,63,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,127,0.01806933308641116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,127,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,63,0.020010666300853092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,255,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,255,0.01757866640885671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,511,0.03368533402681351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,511,0.023247999449570973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,1023,0.054042667150497437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,1023,0.03484266748030981
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,1,0.02794666588306427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,2047,0.09902933239936829
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,1,0.02516799916823705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,2047,0.05871999760468801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,3,0.02735999971628189
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,3,0.025034666061401367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,7,0.027749332288901012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,7,0.025386666258176167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,15,0.02735466758410136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,15,0.024906667570273083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,31,0.029285334050655365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,31,0.02513066679239273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,63,0.027322667340437572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,63,0.02510400116443634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,127,0.029205332199732464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,127,0.024282666544119518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,255,0.033717334270477295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,255,0.025472000241279602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,511,0.056608001391092934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,511,0.035887998839219414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,1023,0.09446932872136433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,1023,0.05570666491985321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,1,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,1,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,3,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,3,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,7,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,7,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,15,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,15,0.011551999797423681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,31,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,2047,0.1794933279355367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,2047,0.09689066807428996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,31,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,63,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,63,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,127,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,255,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,127,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,255,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,511,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,1023,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,511,0.011578666667143503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,1023,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,2047,0.014853333433469137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,2047,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,4095,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,4095,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,8191,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,8191,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,16383,0.02093333254257838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,16383,0.019445333629846573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,32767,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,32767,0.02125866711139679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,65535,0.04182399809360504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,65535,0.025258667767047882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,1,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,1,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,3,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,3,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,7,0.011488000551859537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,131071,0.06279466549555461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,7,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,15,0.011440000186363855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,15,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,31,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,31,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,63,0.010506667196750641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,131071,0.04313066601753235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,63,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,127,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,127,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,255,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,255,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,511,0.011498666057984034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,511,0.012576000144084295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,1023,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,1023,0.011898666620254517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,2047,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,2047,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,4095,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,4095,0.01777600000301997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,8191,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,8191,0.018874666343132656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,16383,0.023685333629449207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,16383,0.021429332594076794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,32767,0.04197866717974345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,32767,0.025274666647116344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,65535,0.061706667145093284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,65535,0.04146666576464971
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,1,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,1,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,3,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,3,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,7,0.011429333438475927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,7,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,15,0.011413333316644033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,15,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,131071,0.10421866178512573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,31,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,31,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,63,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,63,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,131071,0.061834668119748436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,127,0.011519999553759893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,127,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,255,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,511,0.012826666235923767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,255,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,511,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,1023,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,1023,0.01184533288081487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,2047,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,2047,0.015466666469971338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,4095,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,4095,0.017488000293572743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,8191,0.02146666745344798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,8191,0.01915733392039935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,16383,0.03903999924659729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,16383,0.023584000766277313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,32767,0.060458665092786155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,32767,0.0379573330283165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,65535,0.10227200388908386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,65535,0.058090666929880776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,1,0.019861333072185516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,1,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,131071,0.18811200062433878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,131071,0.10069333513577779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,3,0.019493332753578823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,3,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,7,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,7,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,15,0.019530666371186573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,15,0.018954666952292126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,31,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,31,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,63,0.019567999988794327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,63,0.01887999971707662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,127,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,127,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,255,0.020655999581019085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,255,0.01863466699918111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,511,0.03472533325354258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,511,0.0234400009115537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,1023,0.058730666836102806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,1,0.02826666583617528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,1023,0.03551999976237615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,1,0.025392000873883564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,3,0.029311999678611755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,3,0.025231999655564625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,7,0.029653333127498627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,7,0.025775998830795288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,15,0.029333333174387615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,15,0.02532266577084859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,31,0.029418667157491047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,31,0.025744001070658367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,63,0.029546665648619335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,63,0.025605333348115284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,127,0.02959466725587845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,127,0.025583999852339428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,255,0.03562666724125544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,255,0.025626666843891144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,511,0.056032001972198486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,511,0.03777066618204117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,1023,0.09486400087674458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,1023,0.056320001681645714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,1,0.04632533093293508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,3,0.04596266647179922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,1,0.03979199876387914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,3,0.0401653324564298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,7,0.04569066564242045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,7,0.039674667020638786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,15,0.0460746685663859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,31,0.04571733375390371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,31,0.03995733211437861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,15,0.04005866746107737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,63,0.046053335070610046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,63,0.03980266551176707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,127,0.048538664976755776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,127,0.03999999910593033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,255,0.05608533322811127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,255,0.04180799921353658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,511,0.09522133072217305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,511,0.06014933188756307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,1023,0.17300800482432047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,1023,0.09575466314951579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,1,0.030597334106763203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,1,0.027488000690937042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,3,0.0317546675602595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,3,0.027301333844661713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,7,0.027589333554108936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,7,0.029722665747006733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,15,0.03163733333349228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,15,0.027450665831565857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,31,0.02924799919128418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,31,0.02756800005833308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,63,0.02757333219051361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,63,0.03145600110292435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,127,0.02974933385848999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,127,0.02775466690460841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,255,0.0377866675456365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,255,0.027493332823117573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,511,0.058448001742362976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,511,0.03958400090535482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,1,0.04801600178082784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,1,0.0417546679576238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,3,0.0480373352766037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,7,0.048122664292653404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,3,0.041696002086003624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,7,0.041877334316571556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,15,0.04782933493455251
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,15,0.041663999358812966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,31,0.047775998711586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,31,0.04191466669241587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,63,0.04825599988301595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,63,0.04182933270931244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,127,0.05002133548259735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,127,0.04195733368396759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,255,0.0584746648867925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,255,0.04464533428351084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,511,0.0977226694424947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,511,0.06273599962393443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,1,0.08454400300979614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,1,0.07062399884064992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,3,0.07077333331108093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,3,0.08496000369389851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,7,0.08517332871754964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,7,0.07124266525109609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,15,0.08507200082143147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,15,0.07077333331108093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,31,0.08483200271924336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,31,0.07113066812356313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,63,0.07086933155854543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,63,0.08471999565760295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,127,0.08488532900810242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,255,0.09935466448465984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,255,0.07496533294518788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,511,0.17714667320251465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,1,0.01146666705608368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,3,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,1,0.011525332927703857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,3,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,7,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,7,0.011370666325092316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,15,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,15,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,31,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,31,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,63,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,63,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,127,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,127,0.011472000430027643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,255,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,255,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,511,0.011370666325092316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,511,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,511,0.10969066619873047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,1023,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,1023,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,2047,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,2047,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,4095,0.017509333789348602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,4095,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,8191,0.019610666980346043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,8191,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,16383,0.02369600037733714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,16383,0.021216000119845074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,32767,0.041984001795450844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,32767,0.025578667720158894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,127,0.07332266867160797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,65535,0.04054400076468786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,65535,0.062128002444903054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,1,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,1,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,3,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,3,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,7,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,7,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,15,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,15,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,31,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,31,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,63,0.011391999820868174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,63,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,127,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,131071,0.06223999957243601
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,131071,0.10409599542617798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,127,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,255,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,255,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,511,0.01251199965675672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,1023,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,511,0.01267733300725619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,1023,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,2047,0.01762666677435239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,2047,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,4095,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,8191,0.021354667842388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,4095,0.018138666947682697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,8191,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,16383,0.03994133323431015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,16383,0.023669332265853882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,32767,0.040378667414188385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,32767,0.06164266665776571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,65535,0.10337066650390625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,65535,0.05794133245944977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,1,0.01138666644692421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,1,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,3,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,3,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,7,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,7,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,15,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,15,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,31,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,31,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,63,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,63,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,127,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,127,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,255,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,255,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,511,0.01267733300725619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,511,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,131071,0.1910933256149292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,131071,0.10127466917037964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,1023,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,1023,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,2047,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,2047,0.01676799977819125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,4095,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,4095,0.021802666286627453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,8191,0.023285334308942158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,8191,0.03967999915281931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,16383,0.06108266611893972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,16383,0.03826133410135905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,32767,0.05827199916044871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,32767,0.10362666845321655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,65535,0.18752533197402954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,65535,0.09794132908185323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,1,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,1,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,1,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,1,0.011685332904259363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,3,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,3,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,3,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,3,0.011600000162919363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,7,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,7,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,7,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,7,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,15,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,15,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,15,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,131071,0.17958933115005493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,15,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,131071,0.3574133316675822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,31,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,31,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,31,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,31,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,63,0.009663999701539675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,63,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,63,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,63,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,127,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,127,0.009423999736706415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,127,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,127,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,255,0.01002133327225844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,255,0.009888000165422758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,255,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,255,0.010351999973257383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,511,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,511,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,511,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,511,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,1023,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,1023,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,1023,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,1023,0.01201066623131434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,2047,0.013274667163689932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,2047,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,2047,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,2047,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,4095,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,4095,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,4095,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,4095,0.0161920003592968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,8191,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,8191,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,8191,0.013248000293970108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,8191,0.019359999646743137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,16383,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,16383,0.023738667368888855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,16383,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,16383,0.021589333812395733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,32767,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,32767,0.03925333420435587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,32767,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,32767,0.03175999969244003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,65535,0.014709333578745524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,65535,0.05991999804973602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,65535,0.013408000270525614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,65535,0.0525546669960022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,131071,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,131071,0.10322667161623637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,1,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,1,0.011477333803971609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,131071,0.014826666563749313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,1,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,1,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,3,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,3,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,3,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,3,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,7,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,7,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,7,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,7,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,15,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,15,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,15,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,15,0.011402666568756104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,131071,0.08975999553998311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,31,0.010533332824707031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,31,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,31,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,31,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,63,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,63,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,63,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,63,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,127,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,127,0.011930666863918304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,127,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,127,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,255,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,255,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,255,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,511,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,255,0.010570666442314783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,511,0.012784000486135483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,511,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,511,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,1023,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,1023,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,1023,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,1023,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,2047,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,2047,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,2047,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,2047,0.015482666591803232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,4095,0.013376000026861826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,4095,0.019093333433071773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,4095,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,4095,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,8191,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,8191,0.023520000278949738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,8191,0.013359999905029932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,8191,0.02128000060717265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,16383,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,16383,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,16383,0.038933334251244865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,16383,0.031770666440327965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,32767,0.013301332791646322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,32767,0.05930666625499725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,32767,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,32767,0.05232533315817515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,65535,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,65535,0.10074667135874431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,65535,0.013536000003417334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,65535,0.08700799942016602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,131071,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,1,0.009301333377758661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,1,0.009328000247478485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,1,0.00926399976015091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,131071,0.1863200068473816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,1,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,3,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,3,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,3,0.011391999820868174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,3,0.011477333803971609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,7,0.009226666763424873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,7,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,7,0.009002666920423508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,7,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,131071,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,15,0.008778666456540426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,15,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,15,0.009354666496316591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,15,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,131071,0.1590720017751058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,31,0.010181333248813948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,31,0.010501333822806677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,31,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,31,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,63,0.009914666414260864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,63,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,63,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,63,0.010293333480755487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,127,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,127,0.009941333283980688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,127,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,127,0.01267733300725619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,255,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,255,0.010442666709423065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,255,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,255,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,511,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,511,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,511,0.01051733394463857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,511,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,1023,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,1023,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,1023,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,1023,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,2047,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,2047,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,2047,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,2047,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,4095,0.009125333279371262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,4095,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,4095,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,4095,0.011418666690587997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,8191,0.009162666896979014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,8191,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,8191,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,8191,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,16383,0.00897066667675972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,16383,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,16383,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,16383,0.01942933350801468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,32767,0.009658666948477427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,32767,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,32767,0.025775998830795288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,32767,0.025578667720158894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,65535,0.00915733352303505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,65535,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,65535,0.030410667260487873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,65535,0.027445333699385326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,131071,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,131071,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,131071,0.03473066786924998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,1,0.008912000184257826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,131071,0.03166399896144867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,1,0.00921066664159298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,1,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,1,0.009114666531483332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,3,0.009093333035707474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,3,0.010575999816258749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,3,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,3,0.00933333362142245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,7,0.010250666489203772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,7,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,7,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,7,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,15,0.010426666587591171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,15,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,15,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,15,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,31,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,31,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,31,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,31,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,63,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,63,0.010138666878143946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,63,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,63,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,127,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,127,0.009269333134094873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,127,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,127,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,255,0.009269333134094873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,255,0.00915733352303505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,255,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,255,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,511,0.008842666943868002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,511,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,511,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,511,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,1023,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,1023,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,1023,0.009109333157539368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,1023,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,2047,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,2047,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,2047,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,2047,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,4095,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,4095,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,4095,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,4095,0.011381333072980246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,8191,0.01051733394463857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,8191,0.015706667055686314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,8191,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,8191,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,16383,0.011354666203260422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,16383,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,16383,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,16383,0.019152000546455383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,32767,0.009216000015536943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,32767,0.021962667504946392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,32767,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,32767,0.021231998999913532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,65535,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,65535,0.023221333821614582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,65535,0.009370666618148485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,65535,0.023472001155217487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,131071,0.011658667276302973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,131071,0.026842666169007618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,1,0.008949333180983862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,1,0.008869333192706108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,131071,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,1,0.00884799969693025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,1,0.011472000430027643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,3,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,3,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,3,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,3,0.011989332735538483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,7,0.009882666791478792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,7,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,7,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,7,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,15,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,15,0.010154666379094124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,15,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,15,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,31,0.010090666512648264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,31,0.010565333068370819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,31,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,131071,0.025231999655564625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,31,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,63,0.010410666465759277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,63,0.009269333134094873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,63,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,63,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,127,0.010319999729593595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,127,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,127,0.009328000247478485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,127,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,255,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,255,0.008997333546479544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,255,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,255,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,511,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,511,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,511,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,511,0.010570666442314783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,1023,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,1023,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,1023,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,2047,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,1023,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,2047,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,2047,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,2047,0.011370666325092316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,4095,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,4095,0.011941333611806234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,4095,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,4095,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,8191,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,8191,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,8191,0.015541333705186844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,8191,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,16383,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,16383,0.019850666324297588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,16383,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,16383,0.019519999623298645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,32767,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,32767,0.021536000072956085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,32767,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,32767,0.022266666094462078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,65535,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,65535,0.025216000775496166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,65535,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,65535,0.023365333676338196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,131071,0.013082666943470636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,131071,0.027589333554108936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,1,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,131071,0.013386666774749756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,1,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,1,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,1,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,3,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,3,0.009472000102202097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,3,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,7,0.009178666397929192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,3,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,7,0.009503999724984169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,7,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,7,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,15,0.009242666885256767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,131071,0.027221334477265675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,15,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,15,0.010506667196750641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,15,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,31,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,31,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,31,0.009594666461149851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,31,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,63,0.009152000149091085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,63,0.009253333633144697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,63,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,127,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,63,0.011744000017642975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,127,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,127,0.009482666850090027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,127,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,255,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,255,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,255,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,255,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,511,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,511,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,511,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,511,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,1023,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,1023,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,1023,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,1023,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,2047,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,2047,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,2047,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,2047,0.01246400053302447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,4095,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,4095,0.012805332740147909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,4095,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,4095,0.013290667285521826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,8191,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,8191,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,8191,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,8191,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,16383,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,16383,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,16383,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,16383,0.019343999524911244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,32767,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,32767,0.0210506667693456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,32767,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,32767,0.019674666225910187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,65535,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,65535,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,65535,0.02306666721900304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,65535,0.021322667598724365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,131071,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,131071,0.027930667002995808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,131071,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,131071,0.027434666951497395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,1,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,1,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,1,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,3,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,1,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,3,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,3,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,7,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,7,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,3,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,7,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,7,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,15,0.010512000570694605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,15,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,15,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,15,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,31,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,31,0.011338666081428528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,31,0.011493333925803503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,31,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,63,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,63,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,63,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,63,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,127,0.010549332946538925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,127,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,127,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,127,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,255,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,255,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,255,0.010559999694426855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,511,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,255,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,511,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,511,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,511,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,1023,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,1023,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,1023,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,1023,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,2047,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,2047,0.01578666642308235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,2047,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,2047,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,4095,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,4095,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,4095,0.01332266628742218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,4095,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,8191,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,8191,0.023530667026837666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,8191,0.01358933374285698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,8191,0.02295999974012375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,16383,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,16383,0.04030933231115341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,16383,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,16383,0.031680000325044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,32767,0.01340266689658165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,32767,0.06003733476003011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,32767,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,32767,0.051967998345692955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,65535,0.013248000293970108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,65535,0.1011840005715688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,1,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,1,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,65535,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,1,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,1,0.011589333415031433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,3,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,3,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,3,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,3,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,65535,0.08714133501052856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,7,0.011365332951148352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,7,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,7,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,15,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,7,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,15,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,15,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,15,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,31,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,31,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,31,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,31,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,63,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,63,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,63,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,63,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,127,0.010485333700974783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,127,0.010485333700974783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,127,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,255,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,127,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,255,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,255,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,255,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,511,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,511,0.011600000162919363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,511,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,511,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,1023,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,1023,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,1023,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,1023,0.013290667285521826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,2047,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,2047,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,2047,0.013365333278973898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,2047,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,4095,0.012784000486135483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,4095,0.02294933299223582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,4095,0.01333333303531011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,4095,0.021104000508785248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,8191,0.013301332791646322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,8191,0.03868266691764196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,8191,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,8191,0.029738667110602062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,16383,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,16383,0.05836800237496694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,16383,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,16383,0.05204799771308899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,32767,0.01321600005030632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,32767,0.10039466619491577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,32767,0.01328533391157786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,32767,0.08693333466847737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,65535,0.013557333499193192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,65535,0.1832586725552877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,1,0.009461333354314169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,1,0.009392000113924345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,1,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,1,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,65535,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,3,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,3,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,3,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,3,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,7,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,7,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,7,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,7,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,15,0.009482666850090027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,15,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,15,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,15,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,31,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,31,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,31,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,31,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,63,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,63,0.009098666409651438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,63,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,127,0.009018666421373686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,63,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,65535,0.15657066305478415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,127,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,127,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,127,0.010266666611035665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,255,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,255,0.008986666798591614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,255,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,255,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,511,0.009525333220760027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,511,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,511,0.011365332951148352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,511,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,1023,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,1023,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,1023,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,1023,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,2047,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,2047,0.012618667135636011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,2047,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,2047,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,4095,0.01321600005030632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,4095,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,4095,0.012266666938861212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,4095,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,8191,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,8191,0.0189280000825723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,8191,0.012096000214417776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,8191,0.017514667163292568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,16383,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,16383,0.02057066683967908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,16383,0.012778667112191519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,16383,0.01959466685851415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,32767,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,32767,0.021322667598724365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,32767,0.01257066677014033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,32767,0.021045332153638203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,65535,0.011594666788975397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,65535,0.023221333821614582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,65535,0.012357333054145178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,65535,0.0234400009115537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,131071,0.013717333475748697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,131071,0.02951466788848241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,131071,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,1,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,131071,0.02951466788848241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,1,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,1,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,1,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,3,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,3,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,3,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,3,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,7,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,7,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,7,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,7,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,15,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,15,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,15,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,15,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,31,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,31,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,31,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,31,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,63,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,63,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,63,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,63,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,127,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,127,0.009381333366036415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,127,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,127,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,255,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,255,0.00914666677514712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,255,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,255,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,511,0.00949866697192192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,511,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,511,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,511,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,1023,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,1023,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,1023,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,1023,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,2047,0.013290667285521826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,2047,0.012815999488035837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,2047,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,2047,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,4095,0.012762666990359625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,4095,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,4095,0.011855999628702799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,4095,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,8191,0.011418666690587997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,8191,0.017093333105246227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,8191,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,8191,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,16383,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,16383,0.019402666638294857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,16383,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,16383,0.017530667285124462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,32767,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,32767,0.021183999876181286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,32767,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,32767,0.019365333020687103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,65535,0.011893333246310553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,65535,0.02510400116443634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,65535,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,65535,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,131071,0.013306666165590286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,131071,0.043103997906049095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,131071,0.014250667144854864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,1,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,1,0.011823999385039011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,1,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,3,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,1,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,3,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,3,0.011365332951148352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,7,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,7,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,3,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,7,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,7,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,131071,0.031712000568707786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,15,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,15,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,15,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,31,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,15,0.011744000017642975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,31,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,31,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,31,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,63,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,63,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,63,0.011509332805871964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,63,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,127,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,127,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,127,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,127,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,255,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,255,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,255,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,255,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,511,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,511,0.011429333438475927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,511,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,511,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,1023,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,1023,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,1023,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,1023,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,2047,0.013877333452304205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,2047,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,2047,0.013962666193644205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,2047,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,4095,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,4095,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,4095,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,4095,0.021349333226680756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,8191,0.013343999783198038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,8191,0.03933866570393244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,8191,0.013386666774749756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,8191,0.030202666918436687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,16383,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,16383,0.058970664938290916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,16383,0.013424000392357508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,16383,0.05219733218352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,32767,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,32767,0.10126933455467224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,1,0.014021333307027817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,1,0.012138667205969492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,32767,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,1,0.011434666812419891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,1,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,3,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,3,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,3,0.01201066623131434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,3,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,7,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,7,0.011701333026091257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,7,0.011760000139474869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,7,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,15,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,15,0.011530666301647821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,15,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,15,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,32767,0.08706667025883992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,31,0.011391999820868174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,31,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,31,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,63,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,31,0.01166933278242747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,63,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,63,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,63,0.012847999731699625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,127,0.011621333658695221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,127,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,127,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,127,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,255,0.011786667009194693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,255,0.011445333560307821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,255,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,255,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,511,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,511,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,511,0.013461332768201828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,511,0.01321600005030632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,1023,0.016538667182127636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,1023,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,1023,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,1023,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,2047,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,2047,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,2047,0.023381332556406658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,2047,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,4095,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,4095,0.039605334401130676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,4095,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,4095,0.031557333966096245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,8191,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,8191,0.061493332187334694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,8191,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,8191,0.05372266471385956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,16383,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,16383,0.10334400335947673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,16383,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,16383,0.08870399991671245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,32767,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,32767,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,32767,0.1885333259900411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,32767,0.1585760017236074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,1,0.012080000092585882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,1,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,1,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,3,0.011770666887362799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,1,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,3,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,3,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,3,0.012223999947309494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,7,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,7,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,7,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,7,0.011989332735538483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,15,0.012698666503032049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,15,0.011349332829316458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,15,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,15,0.011637333780527115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,31,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,31,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,31,0.011429333438475927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,31,0.011978667229413986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,63,0.012698666503032049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,63,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,63,0.011616000284751257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,63,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,127,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,127,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,127,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,127,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,255,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,255,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,255,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,255,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,511,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,511,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,511,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,511,0.012826666235923767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,1023,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,1023,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,1023,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,1023,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,2047,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,2047,0.02334933231274287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,2047,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,2047,0.021290667355060577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,4095,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,4095,0.03977599988381068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,4095,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,4095,0.031712000568707786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,8191,0.06262399752934773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,8191,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,1,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,8191,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,1,0.013669333110253016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,1,0.013733333597580591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,1,0.013295999417702356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,3,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,3,0.013616000612576803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,8191,0.05273066461086273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,3,0.013573333621025085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,3,0.013354666531085968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,7,0.01333333303531011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,7,0.013370666652917862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,7,0.013327999661366144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,7,0.013872000078360239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,15,0.013663999736309052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,15,0.013605333864688873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,15,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,15,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,31,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,31,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,31,0.01350933313369751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,31,0.01339200014869372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,63,0.01481066644191742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,63,0.01422400027513504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,63,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,63,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,127,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,127,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,127,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,127,0.014181333283583323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,255,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,255,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,255,0.013306666165590286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,255,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,511,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,511,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,511,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,511,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,1023,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,1023,0.021125334004561108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,1023,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,1023,0.02046400060256322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,2047,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,2047,0.03984000037113825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,2047,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,2047,0.03181333343187968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,4095,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,4095,0.062224000692367554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,4095,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,4095,0.052517334620157875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,8191,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,8191,0.1051093339920044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,8191,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,8191,0.0890826682249705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,1,0.01328533391157786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,1,0.01331199953953425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,1,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,1,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,3,0.013663999736309052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,3,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,3,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,3,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,7,0.01479999969402949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,7,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,7,0.013807999591032663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,15,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,15,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,7,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,15,0.013365333278973898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,31,0.013445333888133367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,15,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,31,0.01482133318980535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,31,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,31,0.014725333700577417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,63,0.014741333822409311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,63,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,63,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,63,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,127,0.013376000026861826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,127,0.013466666142145792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,127,0.014346666634082794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,127,0.013338666409254074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,255,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,255,0.013327999661366144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,255,0.013327999661366144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,255,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,511,0.0136266661187013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,511,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,511,0.013674666484196981
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,511,0.015722667177518208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,1023,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,1023,0.02126399924357732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,1023,0.01350933313369751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,1023,0.021354667842388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,2047,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,2047,0.040661332507928215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,2047,0.017759999881188076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,2047,0.03154666721820831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,4095,0.01749333366751671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,4095,0.06615466872851054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,4095,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,1,0.017583999782800674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,1,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,4095,0.05218133330345154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,1,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,1,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,3,0.017674667139848072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,3,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,3,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,3,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,7,0.017466666797796886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,7,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,7,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,7,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,15,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,15,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,15,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,31,0.01691199963291486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,15,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,31,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,31,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,31,0.016735999534527462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,63,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,63,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,63,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,63,0.01682666689157486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,127,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,127,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,127,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,127,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,255,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,255,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,255,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,255,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,511,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,511,0.021173333128293354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,511,0.01687466725707054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,511,0.021349333226680756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,1023,0.01758933315674464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,1023,0.03389333436886469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,1023,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,1023,0.02752533306678136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,2047,0.02143999934196472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,2047,0.05680533250172933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,2047,0.02120000123977661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,2047,0.04980800052483877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,4095,0.09574400385220845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,4095,0.021541332205136616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,4095,0.021514666577180225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,1,0.009130666653315226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,1,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,1,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,4095,0.08074666559696198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,1,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,3,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,3,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,3,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,7,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,3,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,7,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,7,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,7,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,15,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,15,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,15,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,15,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,31,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,31,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,31,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,31,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,63,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,63,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,63,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,63,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,127,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,127,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,127,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,127,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,255,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,255,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,255,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,255,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,511,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,511,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,511,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,511,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,1023,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,1023,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,1023,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,1023,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,2047,0.012730666746695837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,2047,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,2047,0.012815999488035837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,2047,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,4095,0.012437333663304647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,4095,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,4095,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,4095,0.015599999576807022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,8191,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,8191,0.011802667131026586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,8191,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,8191,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,16383,0.011674666156371435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,16383,0.01945066700379054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,16383,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,16383,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,32767,0.021114667256673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,32767,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,32767,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,32767,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,65535,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,65535,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,65535,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,65535,0.02370133250951767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,131071,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,1,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,131071,0.04321600000063578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,1,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,1,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,1,0.011424000064531961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,3,0.009098666409651438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,131071,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,3,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,7,0.009466666728258133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,7,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,3,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,7,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,7,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,15,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,15,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,15,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,31,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,15,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,31,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,31,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,31,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,63,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,63,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,63,0.011535999675591787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,63,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,127,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,131071,0.033701332906881966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,127,0.010533332824707031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,127,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,127,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,255,0.010618666807810465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,255,0.010634666929642359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,255,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,255,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,511,0.010512000570694605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,511,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,511,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,511,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,1023,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,1023,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,1023,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,1023,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,2047,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,2047,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,2047,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,2047,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,4095,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,4095,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,4095,0.012778667112191519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,4095,0.015872000406185787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,8191,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,8191,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,8191,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,8191,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,16383,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,16383,0.021125334004561108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,16383,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,16383,0.019466667125622433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,32767,0.013194666554530462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,32767,0.02550400048494339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,32767,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,32767,0.023733332753181458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,65535,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,65535,0.040421334405740104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,65535,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,65535,0.03145600110292435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,131071,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,131071,0.06029866635799408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,131071,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,1,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,1,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,1,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,131071,0.053557331363360085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,1,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,3,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,3,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,3,0.01791999985774358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,3,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,7,0.01685333376129468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,7,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,7,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,15,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,7,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,15,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,15,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,15,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,31,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,31,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,31,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,63,0.017450666675964992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,31,0.01852799952030182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,63,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,63,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,63,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,127,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,127,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,127,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,127,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,255,0.017514667163292568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,255,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,255,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,255,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,511,0.018085333208243053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,511,0.02126399924357732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,511,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,511,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,1023,0.0182239996890227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,1023,0.033626665671666466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,1023,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,1023,0.029674666623274486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,2047,0.021386665602525074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,2047,0.05817066629727682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,2047,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,2047,0.05030933519204458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,1,0.025568000972270966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,1,0.025413334369659424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,1,0.023408000667889912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,1,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,3,0.025610665480295818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,3,0.025370667378107708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,3,0.02327466756105423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,3,0.023541333774725597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,7,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,7,0.025205334027608235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,7,0.025536000728607178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,7,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,15,0.025386666258176167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,15,0.025402667621771496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,15,0.023418667415777843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,15,0.023455999791622162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,31,0.025413334369659424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,31,0.02552533398071925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,31,0.023205332458019257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,31,0.023242667317390442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,63,0.02737066646416982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,63,0.0232640008131663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,63,0.025301332275072735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,63,0.023269332945346832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,127,0.02532266577084859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,127,0.025226667523384094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,127,0.023007998863856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,127,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,255,0.025589334468046825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,255,0.025445332129796345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,255,0.024693332612514496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,255,0.02342933416366577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,511,0.025616000096003216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,511,0.0377866675456365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,511,0.024853333830833435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,511,0.03152533372243246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,1023,0.025445332129796345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,1023,0.05624533196290334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,1023,0.02518933266401291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,1023,0.04861866434415182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,2047,0.027664000789324444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,2047,0.0974826713403066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,2047,0.02775466690460841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,1,0.02587733417749405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,2047,0.08313600222269694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,1,0.025914666553338368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,1,0.023520000278949738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,1,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,3,0.025653332471847534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,3,0.025637333591779072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,3,0.02346666653951009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,3,0.023610666394233704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,7,0.025077333052953083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,7,0.025370667378107708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,7,0.023157333334287006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,7,0.02348266790310542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,15,0.025392000873883564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,15,0.023445333043734234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,15,0.025360000630219776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,15,0.023717333873112995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,31,0.02518933266401291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,31,0.025583999852339428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,31,0.02378133436044057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,31,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,63,0.025557334224383037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,63,0.025370667378107708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,63,0.02346133440732956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,63,0.02380799998839696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,127,0.025231999655564625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,127,0.025263999899228413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,127,0.02348800003528595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,127,0.02370133250951767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,255,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,255,0.02516799916823705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,255,0.025194667279720306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,255,0.023589332898457844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,511,0.037685332198937736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,511,0.025519999365011852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,511,0.025173333783944447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,511,0.03160000095764796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,1023,0.025498665869235992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,1023,0.05597866574923197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,1023,0.050016000866889954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,1023,0.025733334322770435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,1,0.04014399896065394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,1,0.03792533278465271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,1,0.03581333408753077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,3,0.04026666780312856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,3,0.036144000788529716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,7,0.04025600105524063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,7,0.03999999910593033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,1,0.040149333576361336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,7,0.0359199990828832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,3,0.040149333576361336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,7,0.037733333806196846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,15,0.04001066585381826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,15,0.040063999593257904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,15,0.037690666814645134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,3,0.03839466720819473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,15,0.0377813329299291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,31,0.039621333281199135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,31,0.03975466638803482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,31,0.0377866675456365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,31,0.037802666425704956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,63,0.03980266551176707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,63,0.04001600046952566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,63,0.03765333443880081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,63,0.037658666570981346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,127,0.037615999579429626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,127,0.03951466580231985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,127,0.035887998839219414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,127,0.03737066686153412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,255,0.04170133173465729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,255,0.042175998290379844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,255,0.03766933331886927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,255,0.0372533326347669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,511,0.04205866654713949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,511,0.06226666768391927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,511,0.03798400113979975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,511,0.056426664193471275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,1023,0.04177600145339966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,1,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,1023,0.09758399923642476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,1,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,1,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,1,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,3,0.009226666763424873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,3,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,3,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,3,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,7,0.008976000050703684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,7,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,7,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,1023,0.037658666570981346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,1023,0.08316266536712646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,7,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,15,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,15,0.008943999807039896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,15,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,15,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,31,0.00926399976015091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,31,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,31,0.010512000570694605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,31,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,63,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,63,0.010496000448862711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,63,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,63,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,127,0.010506667196750641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,127,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,127,0.010559999694426855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,127,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,255,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,255,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,255,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,255,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,511,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,511,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,511,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,511,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,1023,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,1023,0.011418666690587997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,1023,0.010538666198650995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,1023,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,2047,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,2047,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,2047,0.012746666868527731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,2047,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,4095,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,4095,0.016650666793187458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,4095,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,4095,0.016890666137139004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,8191,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,8191,0.018640000373125076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,8191,0.012762666990359625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,8191,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,16383,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,16383,0.02032533288002014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,16383,0.012736000120639801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,16383,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,32767,0.02518400053183238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,32767,0.012736000120639801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,32767,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,32767,0.023455999791622162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,65535,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,65535,0.041402667760849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,65535,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,65535,0.031354665756225586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,131071,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,131071,0.06195199986298879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,1,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,1,0.011237333218256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,1,0.011525332927703857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,1,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,3,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,3,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,131071,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,3,0.011354666203260422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,3,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,131071,0.05388266841570536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,7,0.009237333511312803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,7,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,7,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,7,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,15,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,15,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,15,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,15,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,31,0.010501333822806677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,31,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,31,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,31,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,63,0.010602666685978571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,63,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,63,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,63,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,127,0.010618666807810465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,127,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,127,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,127,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,255,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,255,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,255,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,255,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,511,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,511,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,511,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,511,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,1023,0.010405333091815313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,1023,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,1023,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,1023,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,2047,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,2047,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,2047,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,2047,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,4095,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,4095,0.016805333395799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,4095,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,4095,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,8191,0.013082666943470636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,8191,0.019013332823912304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,8191,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,8191,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,16383,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,16383,0.02325333406527837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,16383,0.013338666409254074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,16383,0.021370666722456615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,32767,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,32767,0.0386559988061587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,32767,0.012794667234023413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,32767,0.03183466692765554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,65535,0.013365333278973898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,65535,0.05839466551939646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,65535,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,65535,0.052373334765434265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,131071,0.014544000228246054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,131071,0.10347732901573181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,1,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,1,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,131071,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,3,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,3,0.012608000387748083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,7,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,7,0.011525332927703857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,15,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,15,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,31,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,63,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,31,0.011589333415031433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,63,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,127,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,127,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,255,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,255,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,511,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,511,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,1023,0.012309333930412928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,1023,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,2047,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,131071,0.088837335507075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,2047,0.017802666872739792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,4095,0.018725333114465077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,4095,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,8191,0.021551998953024547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,8191,0.018933333456516266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,16383,0.04048000027736028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,16383,0.024186665813128155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,32767,0.038975998759269714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,32767,0.06243733565012614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,65535,0.06011199951171875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,65535,0.10557867089907329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,1,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,1,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,3,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,3,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,7,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,7,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,15,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,15,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,31,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,31,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,63,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,63,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,127,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,127,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,255,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,255,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,511,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,511,0.01221866657336553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,131071,0.10356799761454265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,131071,0.1923146645228068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,1023,0.013301332791646322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,1023,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,2047,0.018112000077962875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,2047,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,4095,0.019487999379634857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,4095,0.02147199958562851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,8191,0.040576001008351646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,8191,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,16383,0.06198933223883311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,16383,0.03986666599909464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,32767,0.059952000776926674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,32767,0.10451199611028035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,65535,0.10027733445167542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,65535,0.18849599361419678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,1,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,1,0.009797333429257074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,3,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,3,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,7,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,7,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,15,0.010650667051474253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,15,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,31,0.00914666677514712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,31,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,63,0.009039999917149544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,127,0.009130666653315226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,63,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,127,0.009888000165422758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,255,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,255,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,131071,0.18158932526906332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,511,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,131071,0.3588426510492961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,511,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,1023,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,1023,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,2047,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,2047,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,4095,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,4095,0.012736000120639801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,8191,0.015717333803574245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,8191,0.015989333391189575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,16383,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,16383,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,32767,0.028234665592511494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,32767,0.025413334369659424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,65535,0.032245332996050514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,65535,0.032399999598662056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,1,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,1,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,3,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,3,0.011391999820868174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,131071,0.03369600077470144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,7,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,7,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,15,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,15,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,31,0.010469333579142889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,131071,0.035317334036032356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,31,0.01232533281048139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,63,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,63,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,127,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,127,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,255,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,255,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,511,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,511,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,1023,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,1023,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,2047,0.011754666765530905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,2047,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,4095,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,4095,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,8191,0.01584533353646596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,8191,0.016399999459584553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,16383,0.019424000134070713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,16383,0.021040000021457672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,32767,0.0232640008131663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,32767,0.022453332940737408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,65535,0.025888000925381977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,1,0.01044800008336703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,65535,0.023541333774725597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,1,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,3,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,3,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,7,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,7,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,15,0.01009599988659223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,15,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,31,0.010234666367371878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,31,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,63,0.010527999450763067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,63,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,127,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,131071,0.02942933390537898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,127,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,255,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,255,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,511,0.011237333218256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,511,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,1023,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,1023,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,131071,0.02757333219051361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,2047,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,2047,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,4095,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,4095,0.013306666165590286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,8191,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,8191,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,16383,0.02145066608985265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,16383,0.021114667256673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,32767,0.024314666787783306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,32767,0.021722666919231415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,65535,0.025770666698614757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,65535,0.02759466568628947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,1,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,1,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,3,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,3,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,7,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,7,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,15,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,15,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,31,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,31,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,63,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,63,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,131071,0.03154666721820831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,127,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,127,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,131071,0.027376001079877216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,255,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,255,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,511,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,511,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,1023,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,1023,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,2047,0.012752000242471695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,2047,0.012122667084137598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,4095,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,4095,0.013349333157142004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,8191,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,8191,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,16383,0.019786667078733444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,16383,0.019152000546455383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,32767,0.02314666658639908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,32767,0.02107733239730199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,65535,0.025301332275072735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,65535,0.025386666258176167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,1,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,1,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,3,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,3,0.011472000430027643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,131071,0.045781334241231285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,7,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,15,0.010501333822806677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,7,0.010496000448862711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,131071,0.029445332785447437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,15,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,31,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,31,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,63,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,63,0.011413333316644033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,127,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,127,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,255,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,255,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,511,0.012223999947309494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,511,0.012800000607967377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,1023,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,1023,0.013082666943470636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,2047,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,2047,0.018453333526849747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,4095,0.021290667355060577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,4095,0.019066666563351948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,8191,0.04191466669241587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,8191,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,16383,0.06251200040181477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,16383,0.040063999593257904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,32767,0.10505066315333049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,32767,0.059434667229652405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,1,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,1,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,3,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,3,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,7,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,7,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,15,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,15,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,31,0.011727999895811081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,65535,0.18928533792495728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,31,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,63,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,65535,0.10124799609184265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,63,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,127,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,127,0.01137599969903628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,255,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,255,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,511,0.012682666381200155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,511,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,1023,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,1023,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,2047,0.02110933264096578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,2047,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,4095,0.03988266736268997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,4095,0.023887999355793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,8191,0.06066666543483734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,8191,0.03766933331886927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,16383,0.1018453339735667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,16383,0.05824000140031179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,32767,0.09884267052014668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,32767,0.18797866503397623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,1,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,1,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,3,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,3,0.011354666203260422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,7,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,7,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,15,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,15,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,31,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,31,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,63,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,63,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,127,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,127,0.011381333072980246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,255,0.012282667060693106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,255,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,65535,0.17986132701237997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,511,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,65535,0.3535733222961426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,511,0.01145600030819575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,1023,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,1023,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,2047,0.013594667116800943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,2047,0.01333333303531011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,4095,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,4095,0.01471466695268949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,8191,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,8191,0.01746133342385292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,16383,0.021546666820844013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,16383,0.01993600030740102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,32767,0.023370665808518726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,32767,0.021381333470344543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,65535,0.025536000728607178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,65535,0.023584000766277313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,1,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,1,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,3,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,3,0.011450666934251785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,131071,0.047781333327293396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,7,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,7,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,15,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,15,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,31,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,131071,0.02937600016593933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,31,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,63,0.010533332824707031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,63,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,127,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,127,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,255,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,255,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,511,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,511,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,1023,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,1023,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,2047,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,2047,0.013877333452304205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,4095,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,4095,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,8191,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,8191,0.018965333700180054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,16383,0.021205333371957142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,16383,0.019578666736682255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,32767,0.023631999890009563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,32767,0.02109866589307785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,65535,0.042650664846102394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,1,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,65535,0.02552533398071925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,1,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,3,0.011546666423479715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,3,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,7,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,7,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,15,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,15,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,131071,0.04502399762471517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,131071,0.06384533147017162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,31,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,31,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,63,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,63,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,127,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,127,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,255,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,255,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,511,0.012800000607967377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,511,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,1023,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,1023,0.01332266628742218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,2047,0.021253332495689392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,2047,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,4095,0.040149333576361336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,4095,0.023269332945346832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,8191,0.06205866734186808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,8191,0.03956266740957896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,16383,0.10439466436704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,16383,0.0588319996992747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,1,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,1,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,3,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,7,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,3,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,7,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,15,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,15,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,31,0.012847999731699625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,31,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,32767,0.18767466147740683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,63,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,32767,0.10010666648546855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,63,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,127,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,127,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,255,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,255,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,511,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,511,0.01328533391157786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,1023,0.018965333700180054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,1023,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,2047,0.04019733270009359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,2047,0.023285334308942158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,4095,0.06275733311971028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,4095,0.04178666571776072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,8191,0.10507733623186748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,8191,0.062080000837643944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,16383,0.18892266352971396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,16383,0.10318932930628459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,1,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,1,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,3,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,32767,0.35863999525705975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,3,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,7,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,32767,0.1837600072224935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,7,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,15,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,15,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,31,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,31,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,63,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,63,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,127,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,255,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,255,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,511,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,127,0.011711999773979187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,511,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,1023,0.0176959993938605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,1023,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,2047,0.04063999901215235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,2047,0.023930666347344715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,4095,0.04053333401679993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,4095,0.06188266475995382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,1,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,1,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,3,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,3,0.013343999783198038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,8191,0.10498666763305664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,7,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,8191,0.06150400141874949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,7,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,15,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,15,0.014789332946141561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,31,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,31,0.013450667262077332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,63,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,63,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,127,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,255,0.01543466622630755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,127,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,255,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,511,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,511,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,1023,0.021557333568731945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,1023,0.03579733272393545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,2047,0.062319998939832054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,2047,0.04011733333269755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,4095,0.10502933462460835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,4095,0.062224000692367554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,1,0.014746667196353277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,1,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,8191,0.10494400064150493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,8191,0.18948266903559366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,3,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,3,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,7,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,7,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,15,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,15,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,31,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,31,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,63,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,63,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,127,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,127,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,255,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,255,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,511,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,511,0.01752000053723653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,1023,0.035589332381884255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,1023,0.021359999974568684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,2047,0.062133332093556724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,2047,0.040778666734695435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,1,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,1,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,4095,0.105295995871226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,4095,0.06227200229962667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,3,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,3,0.0174346665541331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,7,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,7,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,15,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,15,0.017551999539136887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,31,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,31,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,63,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,63,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,127,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,127,0.017488000293572743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,255,0.018917333334684372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,255,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,511,0.03366400053103765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,511,0.021349333226680756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,1023,0.05420800050099691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,1023,0.033759998778502144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,2047,0.09802132844924927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,2047,0.057775999108950295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,1,0.009578666960199675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,1,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,3,0.009519999846816063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,3,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,7,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,4095,0.1784213383992513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,7,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,15,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,15,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,31,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,4095,0.09616532921791077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,31,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,63,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,63,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,127,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,127,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,255,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,255,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,511,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,511,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,1023,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,1023,0.011786667009194693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,2047,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,2047,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,4095,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,4095,0.01581866666674614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,8191,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,8191,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,16383,0.021338666478792827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,16383,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,32767,0.023445333043734234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,32767,0.02160533269246419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,65535,0.04215999941031138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,1,0.0099093330403169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,65535,0.025770666698614757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,1,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,3,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,3,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,7,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,7,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,15,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,15,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,31,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,31,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,63,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,63,0.011626667032639185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,127,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,255,0.009381333366036415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,131071,0.06541866560777028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,127,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,255,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,511,0.011584000041087469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,131071,0.04379733403523763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,511,0.011365332951148352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,1023,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,1023,0.01145600030819575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,2047,0.016629333297411602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,2047,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,4095,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,4095,0.017525333911180496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,8191,0.019317333896954853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,8191,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,16383,0.023520000278949738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,16383,0.023226665953795116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,32767,0.04132800052563349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,32767,0.02555199960867564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,65535,0.062133332093556724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,65535,0.040474665661652885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,1,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,1,0.018687999496857326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,131071,0.10293333729108174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,131071,0.062394668658574425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,3,0.01915733392039935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,3,0.01855466639002164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,7,0.01899733394384384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,7,0.017466666797796886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,15,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,15,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,31,0.01974933346112569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,31,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,63,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,63,0.018672000616788864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,127,0.019343999524911244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,127,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,255,0.019509332875410717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,255,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,511,0.033973333736260734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,511,0.0230880007147789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,1023,0.054192001620928444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,1023,0.0354666660229365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,2047,0.09940266609191895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,2047,0.058464000622431435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,1,0.02757866680622101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,1,0.025248001019159954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,3,0.02784000088771184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,3,0.02548266698916753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,7,0.025040000677108765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,7,0.02779199928045273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,15,0.02718399961789449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,15,0.02510400116443634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,31,0.027482666075229645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,31,0.02534399926662445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,63,0.027765333652496338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,63,0.025349333882331848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,127,0.027274665733178455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,255,0.03425066669782003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,127,0.025557334224383037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,255,0.02430933217207591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,511,0.05409066875775655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,511,0.035989334185918175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,1023,0.09338666995366414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,1023,0.05425600210825602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,2047,0.1769919991493225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,2047,0.09683733185132344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,1,0.029311999678611755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,1,0.025472000241279602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,3,0.02921066681543986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,3,0.025759999950726826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,7,0.029461334149042766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,7,0.025424001117547352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,15,0.02941333254178365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,15,0.02584533393383026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,31,0.029296000798543293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,31,0.02550400048494339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,63,0.029669334491093952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,63,0.025648000339667004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,127,0.029146666328112285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,127,0.025519999365011852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,255,0.03571200122435888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,255,0.02553066611289978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,511,0.05532266696294149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,511,0.037589333951473236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,1023,0.09531199932098389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,1,0.0459146648645401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,1,0.039781334499518074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,3,0.0458186666170756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,3,0.03990400085846583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,7,0.04595200220743815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,7,0.039893334110577904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,15,0.04604266583919525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,15,0.0397173340121905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,31,0.04595733185609182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,31,0.0397173340121905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,63,0.04595733185609182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,63,0.039834665755430855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,127,0.04826133449872335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,1023,0.05638400216897329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,127,0.03900266687075297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,255,0.056261335810025535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,255,0.04225599765777588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,511,0.09521599610646565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,511,0.06016000111897787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,1,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,1,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,3,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,3,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,7,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,7,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,15,0.010581333190202713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,15,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,31,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,31,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,63,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,1023,0.1725013256072998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,63,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,127,0.010522666076819101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,127,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,255,0.01051733394463857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,255,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,511,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,511,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,1023,0.09532800316810608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,1023,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,1023,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,2047,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,2047,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,4095,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,4095,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,8191,0.01959466685851415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,8191,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,16383,0.024373332659403484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,16383,0.02109866589307785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,32767,0.042319998145103455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,32767,0.025301332275072735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,65535,0.041637333730856575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,65535,0.06262399752934773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,1,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,1,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,3,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,3,0.011333333949247995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,7,0.010224000240365664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,7,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,15,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,15,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,31,0.012096000214417776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,131071,0.06192533175150553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,131071,0.10498666763305664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,31,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,63,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,63,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,127,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,127,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,255,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,255,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,511,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,511,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,1023,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,1023,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,2047,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,2047,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,4095,0.017973333597183228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,4095,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,8191,0.021141332884629566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,8191,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,16383,0.040778666734695435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,16383,0.023445333043734234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,32767,0.06193066636721293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,32767,0.03972266614437103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,65535,0.10332266489664714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,65535,0.058965335289637245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,1,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,1,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,1,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,1,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,3,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,3,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,3,0.011968000481526056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,131071,0.1904266675313314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,3,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,7,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,7,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,7,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,15,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,7,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,131071,0.10097066561381023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,15,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,15,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,15,0.011349332829316458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,31,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,31,0.00980266680320104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,31,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,31,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,63,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,63,0.010224000240365664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,63,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,63,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,127,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,127,0.010170666500926018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,127,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,127,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,255,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,255,0.009493333597977957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,255,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,255,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,511,0.010591999938090643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,511,0.012784000486135483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,511,0.011333333949247995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,511,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,1023,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,1023,0.011391999820868174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,1023,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,1023,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,2047,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,2047,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,2047,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,2047,0.01568000018596649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,4095,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,4095,0.012794667234023413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,4095,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,4095,0.016421332955360413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,8191,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,8191,0.012815999488035837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,8191,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,8191,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,16383,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,16383,0.025077333052953083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,16383,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,16383,0.02142400046189626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,32767,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,32767,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,32767,0.03968533376852671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,32767,0.031328000128269196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,65535,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,65535,0.013023999830087027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,65535,0.062074666221936546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,65535,0.05217066903909048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,131071,0.015541333705186844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,131071,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,131071,0.10494933525721233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,1,0.009322666873534521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,1,0.00926399976015091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,1,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,1,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,3,0.009248000259200731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,3,0.009413333609700203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,3,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,7,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,3,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,7,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,7,0.012714666624863943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,7,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,15,0.009354666496316591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,15,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,15,0.011424000064531961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,15,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,31,0.009232000137368837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,31,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,31,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,31,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,63,0.00943999985853831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,63,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,63,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,127,0.009472000102202097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,127,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,127,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,127,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,255,0.009183999771873156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,255,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,255,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,255,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,511,0.009530666594703993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,511,0.011567999919255575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,511,0.010703999549150467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,511,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,1023,0.010618666807810465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,1023,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,1023,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,1023,0.011584000041087469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,2047,0.012586666891972223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,2047,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,2047,0.010346666599313417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,2047,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,131071,0.08885332942008972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,4095,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,4095,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,4095,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,4095,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,8191,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,8191,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,8191,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,8191,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,16383,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,16383,0.01951466624935468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,16383,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,16383,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,32767,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,32767,0.02752533306678136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,32767,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,32767,0.025263999899228413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,65535,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,65535,0.0310506671667099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,65535,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,65535,0.029535998900731403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,131071,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,131071,0.033471999069054924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,131071,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,131071,0.03175999969244003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,1,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,1,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,3,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,3,0.013306666165590286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,1,0.008863999818762144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,1,0.010682666053374609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,3,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,3,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,7,0.009178666397929192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,7,0.010640000303586325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,7,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,7,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,15,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,15,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,15,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,15,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,31,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,31,0.009253333633144697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,31,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,31,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,63,0.010543999572594961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,63,0.009338666374484697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,63,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,63,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,127,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,127,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,127,0.00943999985853831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,127,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,255,0.010453333457310995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,255,0.009359999870260557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,255,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,255,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,511,0.009599999835093817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,511,0.010725333044926325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,511,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,511,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,1023,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,1023,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,1023,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,1023,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,2047,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,2047,0.012272000312805176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,2047,0.010821333775917688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,2047,0.012784000486135483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,4095,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,4095,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,4095,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,4095,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,8191,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,8191,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,8191,0.013573333621025085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,8191,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,16383,0.012730666746695837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,16383,0.021205333371957142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,16383,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,16383,0.019472000499566395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,32767,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,32767,0.02311466634273529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,32767,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,32767,0.023183998962243397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,65535,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,65535,0.02382933348417282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,65535,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,65535,0.023434666295846302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,131071,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,131071,0.027632000545660656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,131071,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,1,0.010762666662534079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,1,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,1,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,3,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,1,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,3,0.01228800043463707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,3,0.010666667173306147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,3,0.012330666184425354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,7,0.010559999694426855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,7,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,7,0.011535999675591787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,7,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,131071,0.027471999327341717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,15,0.010170666500926018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,15,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,15,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,15,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,31,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,31,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,31,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,31,0.011690666278203329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,63,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,63,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,63,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,127,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,63,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,127,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,127,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,127,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,255,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,255,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,255,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,255,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,511,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,511,0.012202666451533636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,511,0.01198400060335795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,511,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,1023,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,1023,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,1023,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,1023,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,2047,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,2047,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,2047,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,2047,0.015520000209410986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,4095,0.01937066639463107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,4095,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,4095,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,4095,0.01758933315674464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,8191,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,8191,0.02492800106604894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,8191,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,8191,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,16383,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,16383,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,16383,0.03956266740957896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,16383,0.03165333221356074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,32767,0.013295999417702356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,32767,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,32767,0.06071466704209646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,32767,0.05213333169619242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,65535,0.01369599997997284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,65535,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,65535,0.1013706624507904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,65535,0.08742933471997578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,131071,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,131071,0.1854026714960734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,1,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,1,0.009317333499590555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,1,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,1,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,3,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,3,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,3,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,3,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,7,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,7,0.009653333574533463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,7,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,7,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,15,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,15,0.009541333342591921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,15,0.011365332951148352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,15,0.010687999427318573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,31,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,31,0.009733333562811216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,31,0.011391999820868174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,31,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,63,0.00926399976015091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,63,0.010202666744589806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,63,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,63,0.011333333949247995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,127,0.009999999776482582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,127,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,131071,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,127,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,127,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,255,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,255,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,255,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,255,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,511,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,511,0.011338666081428528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,511,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,511,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,1023,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,1023,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,1023,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,1023,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,2047,0.011381333072980246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,131071,0.1597599983215332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,2047,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,2047,0.011989332735538483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,2047,0.012847999731699625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,4095,0.011445333560307821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,4095,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,4095,0.011962667107582092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,4095,0.013167999684810638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,8191,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,8191,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,8191,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,8191,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,16383,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,16383,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,16383,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,16383,0.019637333850065868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,32767,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,32767,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,32767,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,32767,0.021381333470344543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,65535,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,65535,0.023232000569502514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,65535,0.012709333250919977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,65535,0.02271466702222824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,131071,0.029663999875386555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,131071,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,131071,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,131071,0.029258665939172108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,1,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,1,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,1,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,1,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,3,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,3,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,3,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,3,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,7,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,7,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,7,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,7,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,15,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,15,0.01044800008336703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,15,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,15,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,31,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,31,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,31,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,31,0.010842667271693548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,63,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,63,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,63,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,63,0.010816000401973724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,127,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,127,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,127,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,127,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,255,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,255,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,255,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,255,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,511,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,511,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,511,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,511,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,1023,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,1023,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,1023,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,1023,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,2047,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,2047,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,2047,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,2047,0.01743999992807706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,4095,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,4095,0.02333866556485494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,4095,0.013381333400805792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,4095,0.021514666577180225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,8191,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,8191,0.03811199963092804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,8191,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,8191,0.02985599885384242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,16383,0.013408000270525614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,16383,0.05835733314355215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,16383,0.01333333303531011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,16383,0.0524586687485377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,32767,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,32767,0.10041067004203796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,32767,0.013274667163689932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,32767,0.08684266606966655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,65535,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,65535,0.1851039926211039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,1,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,1,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,65535,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,65535,0.1569706698258718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,1,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,1,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,3,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,3,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,3,0.012847999731699625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,3,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,7,0.012842666357755661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,7,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,7,0.012736000120639801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,7,0.01191466674208641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,15,0.011445333560307821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,15,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,15,0.011472000430027643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,15,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,31,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,31,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,31,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,31,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,63,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,63,0.011237333218256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,63,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,63,0.01268799975514412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,127,0.011338666081428528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,127,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,127,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,127,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,255,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,255,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,255,0.01173866664369901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,255,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,511,0.013381333400805792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,511,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,511,0.011744000017642975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,511,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,1023,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,1023,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,1023,0.011871999750534693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,1023,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,2047,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,2047,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,2047,0.023269332945346832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,2047,0.021354667842388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,4095,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,4095,0.03982933362325033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,4095,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,4095,0.03155199935038885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,8191,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,8191,0.06070933242638906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,8191,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,8191,0.052298665046691895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,16383,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,16383,0.10483200351397197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,16383,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,16383,0.08732799688975017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,1,0.01341333364446958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,1,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,1,0.01341333364446958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,1,0.01328533391157786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,3,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,3,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,3,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,3,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,7,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,7,0.013823999712864557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,7,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,7,0.014565333724021912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,15,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,15,0.013269333789745966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,15,0.013295999417702356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,15,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,31,0.013408000270525614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,31,0.013343999783198038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,31,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,31,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,63,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,63,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,63,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,63,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,127,0.013429333766301474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,127,0.013349333157142004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,127,0.013525333255529404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,127,0.01331199953953425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,255,0.014709333578745524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,255,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,255,0.013343999783198038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,255,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,511,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,511,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,511,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,511,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,1023,0.013354666531085968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,1023,0.021114667256673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,1023,0.014720000326633453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,1023,0.021290667355060577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,2047,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,2047,0.017551999539136887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,2047,0.03986666599909464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,2047,0.032069332897663116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,4095,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,4095,0.06237866481145223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,4095,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,4095,0.05421333511670431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,8191,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,8191,0.10558933019638062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,1,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,8191,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,1,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,1,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,1,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,3,0.009226666763424873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,3,0.009354666496316591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,3,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,3,0.010714666297038397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,7,0.009455999980370203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,7,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,7,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,8191,0.08906132976214091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,7,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,15,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,15,0.009162666896979014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,15,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,15,0.010533332824707031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,31,0.009039999917149544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,31,0.011418666690587997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,31,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,63,0.009018666421373686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,31,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,63,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,63,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,63,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,127,0.010512000570694605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,127,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,127,0.010778666784365972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,127,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,255,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,255,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,255,0.010586666564146677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,255,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,511,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,511,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,511,0.010661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,511,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,1023,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,1023,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,1023,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,1023,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,2047,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,2047,0.013445333888133367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,2047,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,2047,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,4095,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,4095,0.016890666137139004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,4095,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,4095,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,8191,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,8191,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,8191,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,8191,0.017530667285124462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,16383,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,16383,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,16383,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,16383,0.018415999909241993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,32767,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,32767,0.021226666867733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,32767,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,32767,0.02011200040578842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,65535,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,65535,0.025231999655564625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,65535,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,65535,0.023455999791622162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,131071,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,131071,0.044122666120529175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,131071,0.013418667018413544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,1,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,1,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,1,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,131071,0.03363200028737386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,1,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,3,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,3,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,3,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,3,0.017850667238235474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,7,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,7,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,7,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,7,0.017562666287024815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,15,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,15,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,15,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,15,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,31,0.017690667261679966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,31,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,31,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,31,0.017477333545684814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,63,0.0174346665541331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,63,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,63,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,63,0.01743999992807706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,127,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,127,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,127,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,127,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,255,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,255,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,255,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,255,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,511,0.01746133342385292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,511,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,511,0.021498667697111767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,1023,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,1023,0.03408000121514002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,1023,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,1023,0.027471999327341717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,2047,0.02123733361562093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,2047,0.05862399935722351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,2047,0.0195573332409064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,2047,0.050240000089009605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,4095,0.021402666966120403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,511,0.021733333667119343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,4095,0.0972213347752889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,1,0.025648000339667004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,4095,0.02021866664290428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,1,0.025087999800841015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,4095,0.08076799909273784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,1,0.023743999501069386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,3,0.025749333202838898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,3,0.02526933451493581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,3,0.0235359991590182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,3,0.023669332265853882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,7,0.02476266771554947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,7,0.025536000728607178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,7,0.02366400013367335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,7,0.023317334552605946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,15,0.025642665723959606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,15,0.025237334271272022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,15,0.02349333216746648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,15,0.02346666653951009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,1,0.024245334168275196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,31,0.025600001215934753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,31,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,31,0.02348800003528595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,31,0.023354666928450268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,63,0.025487999121348064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,63,0.025263999899228413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,63,0.02380799998839696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,127,0.024714666108290356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,63,0.023269332945346832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,127,0.02513599892457326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,127,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,127,0.02317333221435547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,255,0.025600001215934753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,255,0.025274666647116344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,255,0.025114665428797405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,255,0.02347733328739802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,511,0.025248001019159954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,511,0.0378506655494372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,511,0.024357333779335022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,511,0.031354665756225586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,1023,0.02565866708755493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,1023,0.05638400216897329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,1023,0.024218666056791942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,1023,0.04854933420817057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,2047,0.027600000301996868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,2047,0.09732266267140706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,1,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,1,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,1,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,1,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,3,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,3,0.011445333560307821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,3,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,3,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,7,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,7,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,7,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,7,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,2047,0.027456000447273254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,15,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,15,0.010512000570694605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,15,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,15,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,31,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,31,0.00926399976015091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,31,0.010629333555698395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,31,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,63,0.01002133327225844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,63,0.010645333677530289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,63,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,63,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,127,0.010656000425418219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,127,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,127,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,127,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,255,0.010570666442314783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,255,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,2047,0.08307733138402303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,255,0.01051733394463857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,255,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,511,0.01071999967098236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,511,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,511,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,511,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,1023,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,1023,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,1023,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,1023,0.0107893335322539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,2047,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,2047,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,2047,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,2047,0.012661332885424295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,4095,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,4095,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,4095,0.012703999876976013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,4095,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,8191,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,8191,0.01268799975514412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,8191,0.018618666877349217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,8191,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,16383,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,16383,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,16383,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,16383,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,32767,0.013370666652917862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,32767,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,32767,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,32767,0.02347733328739802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,65535,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,65535,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,65535,0.040448000033696495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,65535,0.03123733401298523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,131071,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,131071,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,131071,0.062368000547091164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,131071,0.05258666475613912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,1,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,3,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,1,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,3,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,7,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,7,0.010538666198650995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,15,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,15,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,31,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,31,0.010618666807810465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,63,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,63,0.011237333218256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,127,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,127,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,255,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,255,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,511,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,511,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,1023,0.012826666235923767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,1023,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,2047,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,2047,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,4095,0.018474667022625606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,4095,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,8191,0.021269333859284718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,8191,0.019546666493018467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,16383,0.0405173326532046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,16383,0.02510400116443634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,32767,0.06206400195757548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,32767,0.039781334499518074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,65535,0.10404800375302632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,65535,0.05958400170008341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,1,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,1,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,3,0.01062400018175443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,3,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,7,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,7,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,15,0.010469333579142889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,15,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,31,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,31,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,63,0.0106133334338665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,131071,0.19186667601267496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,63,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,127,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,127,0.010426666587591171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,255,0.008997333546479544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,255,0.011370666325092316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,511,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,511,0.012362666428089142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,1023,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,1023,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,2047,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,2047,0.012762666990359625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,4095,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,131071,0.10188266634941101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,4095,0.012709333250919977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,8191,0.015562667200962702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,8191,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,16383,0.01945066700379054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,16383,0.01978133370478948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,32767,0.028181334336598713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,32767,0.025792000194390614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,65535,0.0327360009153684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,65535,0.03102933367093404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,1,0.008949333180983862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,1,0.010549332946538925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,3,0.008869333192706108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,3,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,7,0.008986666798591614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,7,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,15,0.00903466654320558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,15,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,31,0.009173333023985228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,63,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,31,0.010709332923094431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,131071,0.03748800108830134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,63,0.010794666906197866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,127,0.01240533341964086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,131071,0.033039999504884086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,127,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,255,0.01051733394463857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,255,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,511,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,511,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,1023,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,1023,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,2047,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,2047,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,4095,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,4095,0.01340266689658165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,8191,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,8191,0.01687466725707054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,16383,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,16383,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,32767,0.023552000522613525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,32767,0.02348266790310542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,65535,0.025722667574882507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,65535,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,1,0.010608000059922537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,1,0.011365332951148352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,3,0.010693332801262537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,3,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,7,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,7,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,15,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,15,0.010746666540702185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,31,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,131071,0.03165333221356074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,31,0.010565333068370819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,131071,0.02906133234500885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,63,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,63,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,127,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,255,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,127,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,255,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,511,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,511,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,1023,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,1023,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,2047,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,2047,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,4095,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,4095,0.019413333386182785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,8191,0.039818666875362396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,8191,0.02550933261712392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,16383,0.06364800035953522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,16383,0.03941333293914795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,32767,0.10412266850471497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,32767,0.06066666543483734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,65535,0.18866666158040366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,65535,0.09995733698209126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,1,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,1,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,3,0.011333333949247995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,3,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,7,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,7,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,15,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,15,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,31,0.011792000383138657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,31,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,63,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,63,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,127,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,127,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,255,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,255,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,511,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,511,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,1023,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,1023,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,2047,0.014602666099866232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,2047,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,4095,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,4095,0.013642666240533194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,8191,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,8191,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,131071,0.3628480037053426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,16383,0.021674667795499165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,16383,0.021045332153638203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,32767,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,32767,0.02288000037272771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,131071,0.18083200852076212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,65535,0.02699200063943863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,65535,0.02363733450571696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,1,0.011349332829316458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,1,0.010847999403874079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,3,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,3,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,7,0.01137599969903628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,7,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,131071,0.048485333720842995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,15,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,15,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,31,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,31,0.011434666812419891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,63,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,63,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,131071,0.030207999050617218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,127,0.01137599969903628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,127,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,255,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,255,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,511,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,511,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,1023,0.01421333352724711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,1023,0.013365333278973898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,2047,0.019386666516462963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,2047,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,4095,0.03979199876387914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,4095,0.023631999890009563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,8191,0.0609386662642161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,8191,0.038693333665529885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,16383,0.10384533802668254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,16383,0.059674665331840515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,32767,0.18718934059143066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,32767,0.09875733653704326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,1,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,1,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,3,0.013386666774749756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,3,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,7,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,7,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,15,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,15,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,31,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,31,0.011658667276302973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,63,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,63,0.011488000551859537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,127,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,127,0.012069333344697952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,255,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,65535,0.1791306734085083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,255,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,65535,0.3564213514328003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,511,0.013338666409254074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,511,0.013493333011865616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,1023,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,1023,0.015578666081031164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,2047,0.040250666439533234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,2047,0.02332799881696701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,4095,0.06135466694831848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,4095,0.04040000090996424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,8191,0.10458133618036906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,8191,0.06086933116118113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,1,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,1,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,16383,0.1909439961115519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,3,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,16383,0.10290132959683736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,3,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,7,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,7,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,15,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,15,0.014245333770910898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,31,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,31,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,63,0.014896000425020853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,63,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,127,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,127,0.013386666774749756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,255,0.014474666366974512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,255,0.013306666165590286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,511,0.018965333700180054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,511,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,1023,0.03515733281771342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,1023,0.021557333568731945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,2047,0.0621013343334198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,2047,0.03998400022586187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,4095,0.10424533486366272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,4095,0.06242666641871134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,1,0.009530666594703993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,1,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,3,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,3,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,7,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,15,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,7,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,15,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,31,0.010464000205198923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,31,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,63,0.010677333921194077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,63,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,127,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,127,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,255,0.010741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,8191,0.19062399864196777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,255,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,511,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,511,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,1023,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,1023,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,2047,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,2047,0.013269333789745966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,4095,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,8191,0.10372266173362732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,4095,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,8191,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,8191,0.01736533393462499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,16383,0.02145066608985265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,16383,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,32767,0.023402666052182514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,32767,0.02110933264096578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,65535,0.04271466533342997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,65535,0.025407999753952026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,1,0.019621333728233974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,1,0.01754666616519292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,131071,0.044394666949907936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,131071,0.06543466448783875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,3,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,3,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,7,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,7,0.019050666441520054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,15,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,15,0.019050666441520054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,31,0.01926933353145917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,31,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,63,0.018816000471512478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,63,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,127,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,127,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,255,0.01887999971707662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,255,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,511,0.0341333324710528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,511,0.023168000082174938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,1023,0.053616002202034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,1023,0.03365333378314972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,2047,0.05853333572546641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,4095,0.09620799620946248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,4095,0.17749333381652832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,1,0.027280000348885853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,3,0.02743999908367793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,1,0.025674665967623394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,3,0.025402667621771496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,7,0.027664000789324444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,7,0.025466665625572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,15,0.027376001079877216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,2047,0.0995786686738332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,15,0.025253333151340485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,31,0.028789333999156952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,31,0.025066666305065155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,63,0.025536000728607178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,63,0.027647999425729115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,127,0.028629332780838013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,255,0.025274666647116344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,255,0.034261333445707955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,511,0.03791466603676478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,511,0.05487999816735586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,1023,0.054245332876841225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,1023,0.09533333778381348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,1,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,1,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,3,0.010421333213647207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,3,0.011237333218256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,7,0.010597333312034607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,7,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,15,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,15,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,31,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,127,0.025994665920734406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,31,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,63,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,63,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,127,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,127,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,255,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,255,0.011333333949247995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,511,0.011424000064531961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,2047,0.17824532588322958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,511,0.011429333438475927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,1023,0.01156266654531161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,1023,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,2047,0.01687466725707054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,2047,0.016127999871969223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,4095,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,4095,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,8191,0.019343999524911244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,8191,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,2047,0.09754666686058044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,16383,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,16383,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,32767,0.04190933207670847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,32767,0.02517866591612498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,65535,0.04028266668319702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,65535,0.06233066817124685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,131071,0.10475732882817586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,131071,0.06285866598288219
