framework,version,device,op_name,kernel_source,model,architecture,mla_dtype,kv_cache_dtype,gemm_type,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,1,1,0,0.1049
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,4,1,0,0.1050
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,8,1,0,0.1056
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,16,1,0,0.1052
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,32,1,0,0.1052
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,64,1,0,0.1052
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,128,1,0,0.1050
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,256,1,0,0.1082
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,512,1,0,0.1114
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,1024,1,0,0.1123
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,2048,1,0,0.1155
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,4096,1,0,0.1212
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,8192,1,0,0.1225
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,16384,1,0,0.1355
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,1,1,0,0.1073
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,4,1,0,0.1070
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,8,1,0,0.1076
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,16,1,0,0.1079
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,32,1,0,0.1070
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,64,1,0,0.1068
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,128,1,0,0.1071
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,256,1,0,0.1097
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,512,1,0,0.1132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,1024,1,0,0.1148
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,2048,1,0,0.1214
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,4096,1,0,0.1232
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,8192,1,0,0.1302
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,16384,1,0,0.1230
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,1,1,0,0.1091
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,4,1,0,0.1089
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,8,1,0,0.1089
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,16,1,0,0.1088
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,32,1,0,0.1089
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,64,1,0,0.1091
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,128,1,0,0.1088
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,256,1,0,0.1111
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,512,1,0,0.1151
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,1024,1,0,0.1228
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,2048,1,0,0.1255
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,4096,1,0,0.1166
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,8192,1,0,0.1203
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,16384,1,0,0.1279
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,1,1,0,0.0824
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,4,1,0,0.0818
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,8,1,0,0.0824
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,16,1,0,0.0822
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,32,1,0,0.0820
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,64,1,0,0.0818
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,128,1,0,0.0822
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,256,1,0,0.0852
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,512,1,0,0.0937
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,1024,1,0,0.0967
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,2048,1,0,0.0887
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,4096,1,0,0.0909
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,8192,1,0,0.0984
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,16384,1,0,0.1118
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,1,1,0,0.0867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,4,1,0,0.0866
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,8,1,0,0.0866
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,16,1,0,0.0864
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,32,1,0,0.0864
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,64,1,0,0.0863
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,128,1,0,0.0861
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,256,1,0,0.0908
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,512,1,0,0.0947
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,1024,1,0,0.0890
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,2048,1,0,0.0927
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,4096,1,0,0.0999
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,8192,1,0,0.1133
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,16384,1,0,0.1398
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,1,1,0,0.0824
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,4,1,0,0.0826
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,8,1,0,0.0823
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,16,1,0,0.0823
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,32,1,0,0.0822
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,64,1,0,0.0823
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,128,1,0,0.0831
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,256,1,0,0.0908
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,512,1,0,0.0927
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,1024,1,0,0.0958
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,2048,1,0,0.1030
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,4096,1,0,0.1169
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,1,1,0,0.0883
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,4,1,0,0.0885
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,8,1,0,0.0887
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,16,1,0,0.0883
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,32,1,0,0.0885
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,64,1,0,0.0885
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,128,1,0,0.0906
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,256,1,0,0.0917
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,512,1,0,0.0953
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,1024,1,0,0.1018
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,2048,1,0,0.1169
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,4096,1,0,0.1431
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,1,1,0,0.1041
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,4,1,0,0.1045
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,8,1,0,0.1045
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,16,1,0,0.1044
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,32,1,0,0.1039
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,64,1,0,0.1045
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,128,1,0,0.1071
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,256,1,0,0.1095
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,512,1,0,0.1173
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,1024,1,0,0.1318
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,2048,1,0,0.1595
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,1,1,0,0.1379
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,4,1,0,0.1374
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,8,1,0,0.1375
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,16,1,0,0.1376
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,32,1,0,0.1376
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,64,1,0,0.1379
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,128,1,0,0.1431
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,256,1,0,0.1521
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,512,1,0,0.1676
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,1024,1,0,0.1958
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,1,1,0,0.1897
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,4,1,0,0.1906
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,8,1,0,0.1907
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,16,1,0,0.1906
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,32,1,0,0.1903
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,64,1,0,0.1937
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,128,1,0,0.2040
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,256,1,0,0.2191
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,512,1,0,0.2450
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,1,1,0,0.3429
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,4,1,0,0.3430
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,8,1,0,0.3433
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,16,1,0,0.3431
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,32,1,0,0.3437
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,64,1,0,0.3452
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,128,1,0,0.3607
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,256,1,0,0.3933
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,1,1,0,0.0702
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,4,1,0,0.0701
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,8,1,0,0.0701
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,16,1,0,0.0703
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,32,1,0,0.0701
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,64,1,0,0.0701
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,128,1,0,0.0701
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,256,1,0,0.0722
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,512,1,0,0.0767
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,1024,1,0,0.0781
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,2048,1,0,0.0783
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,4096,1,0,0.0801
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,8192,1,0,0.0865
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,16384,1,0,0.0906
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,1,1,0,0.0720
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,4,1,0,0.0721
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,8,1,0,0.0717
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,16,1,0,0.0720
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,32,1,0,0.0716
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,64,1,0,0.0721
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,128,1,0,0.0722
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,256,1,0,0.0742
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,512,1,0,0.0781
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,1024,1,0,0.0781
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,2048,1,0,0.0803
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,4096,1,0,0.0842
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,8192,1,0,0.0920
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,16384,1,0,0.0991
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,1,1,0,0.0721
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,4,1,0,0.0721
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,8,1,0,0.0722
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,16,1,0,0.0722
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,32,1,0,0.0721
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,64,1,0,0.0722
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,128,1,0,0.0721
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,256,1,0,0.0746
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,512,1,0,0.0798
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,1024,1,0,0.0805
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,2048,1,0,0.0886
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,4096,1,0,0.0914
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,8192,1,0,0.0986
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,16384,1,0,0.0992
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,1,1,0,0.0732
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,4,1,0,0.0730
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,8,1,0,0.0729
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,16,1,0,0.0738
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,32,1,0,0.0730
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,64,1,0,0.0728
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,128,1,0,0.0729
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,256,1,0,0.0757
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,512,1,0,0.0812
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,1024,1,0,0.0885
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,2048,1,0,0.0925
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,4096,1,0,0.0894
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,8192,1,0,0.0949
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,16384,1,0,0.1106
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,1,1,0,0.0617
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,4,1,0,0.0601
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,8,1,0,0.0613
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,16,1,0,0.0613
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,32,1,0,0.0608
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,64,1,0,0.0609
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,128,1,0,0.0615
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,256,1,0,0.0643
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,512,1,0,0.0740
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,1024,1,0,0.0785
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,2048,1,0,0.0745
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,4096,1,0,0.0820
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,8192,1,0,0.0958
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,16384,1,0,0.1222
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,1,1,0,0.0660
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,4,1,0,0.0648
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,8,1,0,0.0660
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,16,1,0,0.0656
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,32,1,0,0.0658
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,64,1,0,0.0658
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,128,1,0,0.0670
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,256,1,0,0.0720
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,512,1,0,0.0760
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,1024,1,0,0.0744
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,2048,1,0,0.0822
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,4096,1,0,0.0984
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,1,1,0,0.0632
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,4,1,0,0.0637
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,8,1,0,0.0635
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,16,1,0,0.0638
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,32,1,0,0.0638
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,64,1,0,0.0640
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,128,1,0,0.0654
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,256,1,0,0.0749
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,512,1,0,0.0777
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,1024,1,0,0.0850
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,2048,1,0,0.1010
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,4096,1,0,0.1277
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,1,1,0,0.0701
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,4,1,0,0.0702
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,8,1,0,0.0711
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,16,1,0,0.0708
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,32,1,0,0.0701
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,64,1,0,0.0718
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,128,1,0,0.0740
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,256,1,0,0.0779
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,512,1,0,0.0865
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,1024,1,0,0.1015
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,2048,1,0,0.1293
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,1,1,0,0.0929
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,4,1,0,0.0929
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,8,1,0,0.0931
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,16,1,0,0.0931
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,32,1,0,0.0930
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,64,1,0,0.0943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,128,1,0,0.0994
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,256,1,0,0.1076
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,512,1,0,0.1244
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,1024,1,0,0.1556
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,1,1,0,0.1244
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,4,1,0,0.1248
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,8,1,0,0.1247
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,16,1,0,0.1247
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,32,1,0,0.1245
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,64,1,0,0.1299
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,128,1,0,0.1420
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,256,1,0,0.1571
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,512,1,0,0.1868
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,1,1,0,0.1994
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,4,1,0,0.1994
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,8,1,0,0.1993
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,16,1,0,0.1996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,32,1,0,0.1992
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,64,1,0,0.2096
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,128,1,0,0.2284
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,256,1,0,0.2560
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,1,1,0,0.0518
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,4,1,0,0.0516
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,8,1,0,0.0517
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,16,1,0,0.0517
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,32,1,0,0.0518
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,64,1,0,0.0517
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,128,1,0,0.0517
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,256,1,0,0.0537
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,512,1,0,0.0557
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,1024,1,0,0.0573
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,2048,1,0,0.0578
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,4096,1,0,0.0579
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,8192,1,0,0.0619
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,16384,1,0,0.0681
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,1,1,0,0.0536
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,4,1,0,0.0538
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,8,1,0,0.0536
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,16,1,0,0.0538
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,32,1,0,0.0538
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,64,1,0,0.0536
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,128,1,0,0.0538
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,256,1,0,0.0557
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,512,1,0,0.0560
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,1024,1,0,0.0577
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,2048,1,0,0.0577
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,4096,1,0,0.0621
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,8192,1,0,0.0657
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,16384,1,0,0.0769
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,1,1,0,0.0535
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,4,1,0,0.0536
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,8,1,0,0.0537
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,16,1,0,0.0535
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,32,1,0,0.0536
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,64,1,0,0.0536
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,128,1,0,0.0536
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,256,1,0,0.0557
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,512,1,0,0.0557
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,1024,1,0,0.0579
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,2048,1,0,0.0579
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,4096,1,0,0.0638
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,8192,1,0,0.0760
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,16384,1,0,0.0879
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,1,1,0,0.0516
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,4,1,0,0.0526
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,8,1,0,0.0517
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,16,1,0,0.0517
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,32,1,0,0.0517
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,64,1,0,0.0518
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,128,1,0,0.0518
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,256,1,0,0.0538
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,512,1,0,0.0576
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,1024,1,0,0.0579
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,2048,1,0,0.0604
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,4096,1,0,0.0722
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,8192,1,0,0.0830
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,16384,1,0,0.1027
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,1,1,0,0.0537
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,4,1,0,0.0538
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,8,1,0,0.0536
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,16,1,0,0.0537
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,32,1,0,0.0536
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,64,1,0,0.0536
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,128,1,0,0.0536
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,256,1,0,0.0556
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,512,1,0,0.0589
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,1024,1,0,0.0619
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,2048,1,0,0.0700
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,4096,1,0,0.0861
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,8192,1,0,0.1068
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,16384,1,0,0.1464
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,1,1,0,0.0475
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,4,1,0,0.0474
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,8,1,0,0.0475
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,16,1,0,0.0474
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,32,1,0,0.0475
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,64,1,0,0.0475
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,128,1,0,0.0475
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,256,1,0,0.0517
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,512,1,0,0.0578
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,1024,1,0,0.0636
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,2048,1,0,0.0826
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,4096,1,0,0.1021
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,1,1,0,0.0496
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,4,1,0,0.0493
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,8,1,0,0.0496
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,16,1,0,0.0496
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,32,1,0,0.0495
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,64,1,0,0.0509
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,128,1,0,0.0535
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,256,1,0,0.0593
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,512,1,0,0.0667
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,1024,1,0,0.0795
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,2048,1,0,0.0986
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,4096,1,0,0.1375
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,1,1,0,0.0579
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,4,1,0,0.0579
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,8,1,0,0.0579
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,16,1,0,0.0577
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,32,1,0,0.0576
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,64,1,0,0.0596
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,128,1,0,0.0626
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,256,1,0,0.0759
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,512,1,0,0.0873
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,1024,1,0,0.1065
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,2048,1,0,0.1472
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,1,1,0,0.0827
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,4,1,0,0.0823
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,8,1,0,0.0823
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,16,1,0,0.0824
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,32,1,0,0.0826
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,64,1,0,0.0853
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,128,1,0,0.0861
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,256,1,0,0.1035
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,512,1,0,0.1254
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,1024,1,0,0.1651
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,1,1,0,0.1171
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,4,1,0,0.1168
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,8,1,0,0.1167
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,16,1,0,0.1168
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,32,1,0,0.1168
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,64,1,0,0.1173
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,128,1,0,0.1195
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,256,1,0,0.1517
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,512,1,0,0.1896
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,1,1,0,0.1824
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,4,1,0,0.1828
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,8,1,0,0.1824
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,16,1,0,0.1827
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,32,1,0,0.1827
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,64,1,0,0.1867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,128,1,0,0.1902
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,256,1,0,0.2498
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,1,1,0,0.0455
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,4,1,0,0.0454
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,8,1,0,0.0454
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,16,1,0,0.0456
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,32,1,0,0.0456
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,64,1,0,0.0455
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,128,1,0,0.0455
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,256,1,0,0.0474
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,512,1,0,0.0496
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,1024,1,0,0.0495
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,2048,1,0,0.0496
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,4096,1,0,0.0515
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,8192,1,0,0.0517
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,16384,1,0,0.0577
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,1,1,0,0.0456
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,4,1,0,0.0454
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,8,1,0,0.0455
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,16,1,0,0.0456
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,32,1,0,0.0456
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,64,1,0,0.0454
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,128,1,0,0.0455
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,256,1,0,0.0476
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,512,1,0,0.0495
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,1024,1,0,0.0495
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,2048,1,0,0.0495
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,4096,1,0,0.0496
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,8192,1,0,0.0559
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,16384,1,0,0.0594
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,1,1,0,0.0466
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,4,1,0,0.0469
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,8,1,0,0.0475
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,16,1,0,0.0475
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,32,1,0,0.0473
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,64,1,0,0.0474
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,128,1,0,0.0474
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,256,1,0,0.0493
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,512,1,0,0.0496
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,1024,1,0,0.0497
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,2048,1,0,0.0525
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,4096,1,0,0.0558
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,8192,1,0,0.0579
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,16384,1,0,0.0659
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,1,1,0,0.0454
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,4,1,0,0.0456
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,8,1,0,0.0454
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,16,1,0,0.0455
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,32,1,0,0.0454
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,64,1,0,0.0456
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,128,1,0,0.0454
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,256,1,0,0.0476
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,512,1,0,0.0495
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,1024,1,0,0.0496
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,2048,1,0,0.0499
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,4096,1,0,0.0558
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,8192,1,0,0.0619
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,16384,1,0,0.0853
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,1,1,0,0.0455
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,4,1,0,0.0454
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,8,1,0,0.0454
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,16,1,0,0.0454
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,32,1,0,0.0454
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,64,1,0,0.0453
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,128,1,0,0.0454
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,256,1,0,0.0475
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,512,1,0,0.0515
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,1024,1,0,0.0515
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,2048,1,0,0.0537
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,4096,1,0,0.0606
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,8192,1,0,0.0832
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,16384,1,0,0.1121
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,1,1,0,0.0456
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,4,1,0,0.0456
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,8,1,0,0.0456
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,16,1,0,0.0456
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,32,1,0,0.0456
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,64,1,0,0.0454
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,128,1,0,0.0455
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,256,1,0,0.0485
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,512,1,0,0.0515
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,1024,1,0,0.0537
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,2048,1,0,0.0673
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,4096,1,0,0.0847
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,1,1,0,0.0433
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,4,1,0,0.0435
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,8,1,0,0.0433
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,16,1,0,0.0434
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,32,1,0,0.0434
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,64,1,0,0.0434
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,128,1,0,0.0434
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,256,1,0,0.0471
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,512,1,0,0.0533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,1024,1,0,0.0662
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,2048,1,0,0.0863
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,4096,1,0,0.1113
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,1,1,0,0.0444
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,4,1,0,0.0443
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,8,1,0,0.0450
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,16,1,0,0.0444
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,32,1,0,0.0437
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,64,1,0,0.0454
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,128,1,0,0.0455
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,256,1,0,0.0504
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,512,1,0,0.0617
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,1024,1,0,0.0823
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,2048,1,0,0.1088
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,1,1,0,0.0556
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,4,1,0,0.0556
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,8,1,0,0.0558
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,16,1,0,0.0557
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,32,1,0,0.0557
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,64,1,0,0.0569
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,128,1,0,0.0617
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,256,1,0,0.0781
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,512,1,0,0.0921
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,1024,1,0,0.1215
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,1,1,0,0.0829
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,4,1,0,0.0826
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,8,1,0,0.0833
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,16,1,0,0.0833
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,32,1,0,0.0828
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,64,1,0,0.0879
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,128,1,0,0.0901
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,256,1,0,0.1136
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,512,1,0,0.1412
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,1,1,0,0.1194
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,4,1,0,0.1193
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,8,1,0,0.1189
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,16,1,0,0.1191
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,32,1,0,0.1191
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,64,1,0,0.1271
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,128,1,0,0.1324
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,256,1,0,0.1716
