framework,version,device,op_name,kernel_source,model,architecture,mla_dtype,kv_cache_dtype,gemm_type,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,1,1,0,0.5470
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,4,1,0,0.5429
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,8,1,0,0.5181
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,16,1,0,0.5120
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,32,1,0,0.5134
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,64,1,0,0.5165
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,128,1,0,0.5119
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,256,1,0,0.5233
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,512,1,0,0.5272
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,1024,1,0,0.5653
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,2048,1,0,0.7679
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,4096,1,0,1.3739
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,8192,1,0,3.3048
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,16384,1,0,10.9367
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,1,1,0,0.5414
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,4,1,0,0.5211
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,8,1,0,0.5169
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,16,1,0,0.5103
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,32,1,0,0.5138
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,64,1,0,0.5164
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,128,1,0,0.5160
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,256,1,0,0.5300
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,512,1,0,0.5589
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,1024,1,0,0.7060
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,2048,1,0,1.1487
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,4096,1,0,2.4739
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,8192,1,0,7.0385
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,16384,1,0,21.2674
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,1,1,0,0.5343
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,4,1,0,0.5105
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,8,1,0,0.5082
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,16,1,0,0.5139
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,32,1,0,0.5102
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,64,1,0,0.5300
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,128,1,0,0.5208
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,256,1,0,0.5590
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,512,1,0,0.6770
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,1024,1,0,1.0327
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,2048,1,0,2.0411
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,4096,1,0,5.1478
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,8192,1,0,14.2109
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,16384,1,0,43.7889
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,1,1,0,0.5096
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,4,1,0,0.5126
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,8,1,0,0.5114
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,16,1,0,0.5071
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,32,1,0,0.5164
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,64,1,0,0.5126
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,128,1,0,0.5480
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,256,1,0,0.6597
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,512,1,0,0.9727
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,1024,1,0,1.8090
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,2048,1,0,4.0467
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,4096,1,0,10.3705
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,8192,1,0,29.4082
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,16384,1,0,88.7115
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,1,1,0,0.5132
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,4,1,0,0.5162
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,8,1,0,0.5146
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,16,1,0,0.5308
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,32,1,0,0.5275
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,64,1,0,0.5617
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,128,1,0,0.6639
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,256,1,0,0.9527
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,512,1,0,1.7014
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,1024,1,0,3.5624
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,2048,1,0,8.5264
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,4096,1,0,21.2480
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,1,1,0,0.5083
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,4,1,0,0.5241
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,8,1,0,0.5462
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,16,1,0,0.5444
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,32,1,0,0.6038
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,64,1,0,0.7036
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,128,1,0,0.9533
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,256,1,0,1.6578
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,512,1,0,3.4468
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,1024,1,0,7.4128
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,2048,1,0,17.1482
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,4096,1,0,42.7103
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,1,1,0,0.6046
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,4,1,0,0.6220
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,8,1,0,0.6305
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,16,1,0,0.6786
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,32,1,0,0.7830
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,64,1,0,1.0412
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,128,1,0,1.6632
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,256,1,0,3.1725
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,512,1,0,6.9853
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,1024,1,0,15.0509
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,2048,1,0,34.6685
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,1,1,0,0.7711
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,4,1,0,0.8009
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,8,1,0,0.8531
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,16,1,0,0.9441
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,32,1,0,1.1930
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,64,1,0,1.8590
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,128,1,0,3.2625
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,256,1,0,6.5913
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,512,1,0,13.9942
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,1024,1,0,30.6197
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,1,1,0,1.1363
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,4,1,0,1.1829
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,8,1,0,1.2683
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,16,1,0,1.4997
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,32,1,0,2.1325
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,64,1,0,3.5795
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,128,1,0,6.6866
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,256,1,0,13.8806
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,512,1,0,28.6646
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,1,1,0,1.8004
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,4,1,0,1.9208
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,8,1,0,2.1537
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,16,1,0,2.7482
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,32,1,0,4.2314
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,64,1,0,7.3575
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,128,1,0,13.7338
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,256,1,0,27.3350
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,1,1,0,3.1345
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,4,1,0,3.4591
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,8,1,0,4.1818
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,16,1,0,5.4704
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,32,1,0,8.8468
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,64,1,0,15.5071
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,128,1,0,28.1885
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,1,1,0,0.5471
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,4,1,0,0.5347
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,8,1,0,0.5324
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,16,1,0,0.5270
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,32,1,0,0.5304
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,64,1,0,0.5314
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,128,1,0,0.5291
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,256,1,0,0.5364
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,512,1,0,0.5302
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,1024,1,0,0.5486
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,2048,1,0,0.6166
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,4096,1,0,0.8922
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,8192,1,0,1.8029
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,16384,1,0,5.3923
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,1,1,0,0.5264
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,4,1,0,0.5360
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,8,1,0,0.5208
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,16,1,0,0.5165
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,32,1,0,0.5309
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,64,1,0,0.5215
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,128,1,0,0.5242
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,256,1,0,0.5322
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,512,1,0,0.5438
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,1024,1,0,0.5878
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,2048,1,0,0.7766
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,4096,1,0,1.3779
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,8192,1,0,3.3303
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,16384,1,0,11.0733
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,1,1,0,0.5458
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,4,1,0,0.5246
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,8,1,0,0.5274
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,16,1,0,0.5340
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,32,1,0,0.5349
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,64,1,0,0.5256
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,128,1,0,0.5326
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,256,1,0,0.5438
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,512,1,0,0.5725
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,1024,1,0,0.7153
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,2048,1,0,1.1456
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,4096,1,0,2.4892
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,8192,1,0,7.1322
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,16384,1,0,21.4386
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,1,1,0,0.5380
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,4,1,0,0.5285
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,8,1,0,0.5342
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,16,1,0,0.5301
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,32,1,0,0.5386
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,64,1,0,0.5513
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,128,1,0,0.5551
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,256,1,0,0.5955
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,512,1,0,0.6936
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,1024,1,0,1.0446
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,2048,1,0,2.0546
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,4096,1,0,5.0540
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,8192,1,0,14.2655
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,16384,1,0,44.4087
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,1,1,0,0.5316
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,4,1,0,0.5312
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,8,1,0,0.5358
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,16,1,0,0.5379
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,32,1,0,0.5473
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,64,1,0,0.5431
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,128,1,0,0.5880
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,256,1,0,0.6729
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,512,1,0,0.9813
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,1024,1,0,1.8343
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,2048,1,0,4.0991
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,4096,1,0,10.5683
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,1,1,0,0.5291
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,4,1,0,0.5371
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,8,1,0,0.5254
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,16,1,0,0.5413
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,32,1,0,0.5403
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,64,1,0,0.5842
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,128,1,0,0.6763
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,256,1,0,0.9668
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,512,1,0,1.7236
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,1024,1,0,3.6562
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,2048,1,0,8.6003
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,4096,1,0,21.7513
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,1,1,0,0.5506
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,4,1,0,0.5626
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,8,1,0,0.5635
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,16,1,0,0.5738
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,32,1,0,0.6224
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,64,1,0,0.7137
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,128,1,0,0.9497
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,256,1,0,1.6777
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,512,1,0,3.3806
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,1024,1,0,7.5151
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,2048,1,0,17.3896
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,1,1,0,0.6597
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,4,1,0,0.6474
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,8,1,0,0.6762
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,16,1,0,0.7020
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,32,1,0,0.7862
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,64,1,0,1.0297
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,128,1,0,1.6952
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,256,1,0,3.3494
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,512,1,0,7.0834
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,1024,1,0,15.4399
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,1,1,0,0.8726
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,4,1,0,0.8606
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,8,1,0,0.8736
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,16,1,0,0.9564
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,32,1,0,1.1863
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,64,1,0,1.8556
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,128,1,0,3.3121
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,256,1,0,6.7046
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,512,1,0,14.2030
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,1,1,0,1.3104
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,4,1,0,1.2170
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,8,1,0,1.2788
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,16,1,0,1.4934
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,32,1,0,2.1659
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,64,1,0,3.6614
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,128,1,0,6.8071
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,256,1,0,13.8699
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,1,1,0,1.8220
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,4,1,0,1.9285
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,8,1,0,2.1580
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,16,1,0,2.7758
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,32,1,0,4.2184
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,64,1,0,7.6367
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,128,1,0,14.0192
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,1,1,0,0.5470
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,4,1,0,0.5437
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,8,1,0,0.5366
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,16,1,0,0.5356
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,32,1,0,0.5288
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,64,1,0,0.5308
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,128,1,0,0.5231
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,256,1,0,0.5286
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,512,1,0,0.5380
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,1024,1,0,0.5370
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,2048,1,0,0.5557
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,4096,1,0,0.6855
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,8192,1,0,1.1298
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,16384,1,0,2.6957
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,1,1,0,0.5219
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,4,1,0,0.5333
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,8,1,0,0.5249
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,16,1,0,0.5330
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,32,1,0,0.5294
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,64,1,0,0.5338
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,128,1,0,0.5254
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,256,1,0,0.5334
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,512,1,0,0.5378
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,1024,1,0,0.5443
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,2048,1,0,0.6060
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,4096,1,0,0.8966
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,8192,1,0,1.8213
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,16384,1,0,5.3444
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,1,1,0,0.5243
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,4,1,0,0.5314
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,8,1,0,0.5332
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,16,1,0,0.5275
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,32,1,0,0.5242
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,64,1,0,0.5222
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,128,1,0,0.5192
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,256,1,0,0.5300
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,512,1,0,0.5384
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,1024,1,0,0.5716
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,2048,1,0,0.7746
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,4096,1,0,1.3826
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,8192,1,0,3.5054
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,16384,1,0,11.4928
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,1,1,0,0.5283
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,4,1,0,0.5303
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,8,1,0,0.5277
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,16,1,0,0.5231
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,32,1,0,0.5231
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,64,1,0,0.5281
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,128,1,0,0.5269
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,256,1,0,0.5554
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,512,1,0,0.5830
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,1024,1,0,0.7177
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,2048,1,0,1.1564
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,4096,1,0,2.5785
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,8192,1,0,7.1596
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,16384,1,0,22.5288
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,1,1,0,0.5352
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,4,1,0,0.5340
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,8,1,0,0.5353
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,16,1,0,0.5339
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,32,1,0,0.5371
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,64,1,0,0.5357
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,128,1,0,0.5540
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,256,1,0,0.5845
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,512,1,0,0.6912
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,1024,1,0,1.0424
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,2048,1,0,2.1394
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,4096,1,0,5.2381
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,1,1,0,0.5215
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,4,1,0,0.5247
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,8,1,0,0.5168
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,16,1,0,0.5278
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,32,1,0,0.5342
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,64,1,0,0.5467
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,128,1,0,0.5716
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,256,1,0,0.7018
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,512,1,0,0.9817
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,1024,1,0,1.9246
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,2048,1,0,4.3379
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,4096,1,0,10.9381
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,1,1,0,0.5296
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,4,1,0,0.5360
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,8,1,0,0.5311
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,16,1,0,0.5431
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,32,1,0,0.5557
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,64,1,0,0.5743
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,128,1,0,0.6757
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,256,1,0,0.9731
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,512,1,0,1.8139
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,1024,1,0,3.8085
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,2048,1,0,8.9365
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,1,1,0,0.5276
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,4,1,0,0.5495
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,8,1,0,0.5554
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,16,1,0,0.5737
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,32,1,0,0.6190
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,64,1,0,0.7165
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,128,1,0,0.9739
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,256,1,0,1.7714
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,512,1,0,3.6225
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,1024,1,0,7.9948
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,1,1,0,0.6343
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,4,1,0,0.6394
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,8,1,0,0.6580
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,16,1,0,0.7098
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,32,1,0,0.7892
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,64,1,0,1.0422
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,128,1,0,1.7817
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,256,1,0,3.4725
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,512,1,0,7.3755
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,1,1,0,0.8287
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,4,1,0,0.8442
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,8,1,0,0.8655
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,16,1,0,0.9433
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,32,1,0,1.2015
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,64,1,0,1.9456
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,128,1,0,3.4969
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,256,1,0,7.2130
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,1,1,0,1.1430
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,4,1,0,1.2100
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,8,1,0,1.2890
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,16,1,0,1.5128
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,32,1,0,2.2532
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,64,1,0,3.8713
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,128,1,0,7.3101
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,1,1,0,0.5546
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,4,1,0,0.5275
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,8,1,0,0.5379
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,16,1,0,0.5360
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,32,1,0,0.5406
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,64,1,0,0.5283
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,128,1,0,0.5315
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,256,1,0,0.5320
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,512,1,0,0.5322
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,1024,1,0,0.5336
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,2048,1,0,0.5310
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,4096,1,0,0.5821
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,8192,1,0,0.8157
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,16384,1,0,1.5790
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,1,1,0,0.5256
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,4,1,0,0.5304
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,8,1,0,0.5222
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,16,1,0,0.5194
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,32,1,0,0.5333
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,64,1,0,0.5258
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,128,1,0,0.5334
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,256,1,0,0.5304
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,512,1,0,0.5302
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,1024,1,0,0.5408
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,2048,1,0,0.5381
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,4096,1,0,0.6854
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,8192,1,0,1.1305
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,16384,1,0,2.7529
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,1,1,0,0.5292
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,4,1,0,0.5252
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,8,1,0,0.5281
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,16,1,0,0.5273
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,32,1,0,0.5330
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,64,1,0,0.5252
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,128,1,0,0.5290
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,256,1,0,0.5301
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,512,1,0,0.5338
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,1024,1,0,0.5423
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,2048,1,0,0.6213
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,4096,1,0,0.9020
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,8192,1,0,1.9042
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,16384,1,0,5.4058
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,1,1,0,0.5275
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,4,1,0,0.5260
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,8,1,0,0.5336
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,16,1,0,0.5254
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,32,1,0,0.5289
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,64,1,0,0.5351
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,128,1,0,0.5387
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,256,1,0,0.5423
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,512,1,0,0.5425
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,1024,1,0,0.5934
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,2048,1,0,0.7864
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,4096,1,0,1.4701
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,8192,1,0,3.6970
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,16384,1,0,11.7139
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,1,1,0,0.5304
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,4,1,0,0.5379
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,8,1,0,0.5364
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,16,1,0,0.5402
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,32,1,0,0.5428
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,64,1,0,0.5462
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,128,1,0,0.5345
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,256,1,0,0.5513
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,512,1,0,0.5847
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,1024,1,0,0.7356
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,2048,1,0,1.2462
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,4096,1,0,2.8331
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,1,1,0,0.5255
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,4,1,0,0.5340
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,8,1,0,0.5294
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,16,1,0,0.5356
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,32,1,0,0.5328
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,64,1,0,0.5439
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,128,1,0,0.5420
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,256,1,0,0.6116
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,512,1,0,0.6844
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,1024,1,0,1.1305
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,2048,1,0,2.3877
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,4096,1,0,5.6772
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,1,1,0,0.8140
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,4,1,0,0.5281
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,8,1,0,0.5351
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,16,1,0,0.5255
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,32,1,0,0.5376
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,64,1,0,0.5489
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,128,1,0,0.5811
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,256,1,0,0.6710
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,512,1,0,1.0715
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,1024,1,0,2.1663
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,2048,1,0,4.8914
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,1,1,0,0.5306
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,4,1,0,0.5260
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,8,1,0,0.5358
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,16,1,0,0.5411
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,32,1,0,0.5403
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,64,1,0,0.5738
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,128,1,0,0.6773
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,256,1,0,1.0493
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,512,1,0,2.0508
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,1024,1,0,4.3081
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,1,1,0,0.5372
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,4,1,0,0.5424
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,8,1,0,0.5539
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,16,1,0,0.5733
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,32,1,0,0.6191
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,64,1,0,0.7199
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,128,1,0,1.0566
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,256,1,0,2.0101
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,512,1,0,4.0410
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,1,1,0,0.6409
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,4,1,0,0.6327
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,8,1,0,0.6546
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,16,1,0,0.6989
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,32,1,0,0.7956
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,64,1,0,1.1331
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,128,1,0,2.0235
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,256,1,0,3.9708
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,1,1,0,0.8047
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,4,1,0,0.8282
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,8,1,0,0.8688
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,16,1,0,0.9509
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,32,1,0,1.2880
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,64,1,0,2.1875
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,128,1,0,4.0591
