framework,version,device,op_name,kernel_source,allreduce_dtype,num_gpus,message_size,latency,backend
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,128,0.0054016000032424925,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,128,0.08853983879089355,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,256,0.005206400156021118,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,256,0.08836064338684083,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,512,0.005321919918060303,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,512,0.08307423591613769,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,1024,0.005284799933433533,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,1024,0.0708844804763794,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,2048,0.005583360195159912,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,2048,0.06927775859832763,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,4096,0.005724160075187683,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,4096,0.06903456211090088,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,8192,0.0062217599153518675,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,8192,0.06886303901672364,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,16384,0.007406719923019408,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,16384,0.042523522377014164,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,32768,0.009860799908638,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,32768,0.04284895896911621,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,65536,0.010988479852676392,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,65536,0.042931838035583494,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,131072,0.018051199913024903,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,131072,0.042851839065551754,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,262144,0.03210335969924927,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,262144,0.04300960063934326,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,524288,0.060247039794921874,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,524288,0.0660969591140747,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,1048576,0.11682111740112304,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,1048576,0.12366368293762206,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,2097152,0.22941215515136718,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,2097152,0.238155517578125,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,4194304,0.4499491119384766,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,4194304,0.4548195266723633,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,8388608,0.8918793487548828,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,8388608,0.9022431945800781,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,16777216,1.7769200134277345,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,16777216,1.8459382629394532,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,33554432,3.5416125488281254,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,33554432,3.710653381347656,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,67108864,7.006727905273438,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,67108864,7.45470947265625,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,134217728,14.0616943359375,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,134217728,14.928778076171875,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,268435456,27.988046875,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,268435456,29.820949707031254,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,536870912,55.952304687499996,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,536870912,59.2941748046875,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,128,0.013292479515075683,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,128,0.12166080474853516,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,256,0.013493759632110598,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,256,0.10066847801208496,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,512,0.013512320518493654,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,512,0.09405920028686524,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,1024,0.013742719888687133,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,1024,0.05743135929107666,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,2048,0.014087040424346924,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,2048,0.05659103870391846,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,4096,0.01432096004486084,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,4096,0.057459201812744144,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,8192,0.017511680126190185,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,8192,0.0559548807144165,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,16384,0.026024000644683836,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,16384,0.058361601829528806,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,32768,0.036925439834594724,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,32768,0.05801375865936279,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,65536,0.05962944030761719,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,65536,0.06355487823486328,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,131072,0.11557791709899903,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,131072,0.12006464004516601,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,262144,0.21897632598876954,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,262144,0.22389535903930663,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,524288,0.3606000137329102,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,524288,0.35646209716796873,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,1048576,0.8407574462890626,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,1048576,0.8410050964355469,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,2097152,1.6486845397949217,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,2097152,1.6503170776367189,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,4194304,3.321838989257812,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,4194304,3.3644293212890624,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,8388608,6.676235351562499,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,8388608,6.704022216796875,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,16777216,13.59797607421875,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,16777216,13.62226318359375,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,33554432,27.241481933593754,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,33554432,27.34803466796875,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,67108864,54.406435546875,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,67108864,54.735595703125,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,134217728,108.4784375,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,134217728,109.30738281250001,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,268435456,216.16224609374999,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,268435456,218.29519531249997,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,536870912,430.9239453125,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,536870912,435.1060546875,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,128,0.024985599517822265,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,128,0.10426688194274902,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,256,0.025581440925598144,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,256,0.06822368144989013,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,512,0.025525760650634766,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,512,0.0657263994216919,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,1024,0.026113920211791992,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,1024,0.061339521408081056,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,2048,0.026732800006866453,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,2048,0.059720001220703124,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,4096,0.027623040676116945,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,4096,0.06146944046020508,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,8192,0.027370879650115965,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,8192,0.06110591888427734,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,16384,0.04835616111755371,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,16384,0.057760639190673826,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,32768,0.07288832187652587,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,32768,0.06971136093139649,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,65536,0.1083465576171875,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,65536,0.1083580780029297,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,131072,0.2070822334289551,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,131072,0.209616641998291,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,262144,0.44452865600585934,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,262144,0.4174188613891602,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,524288,0.5826480102539062,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,524288,0.5977417755126953,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,1048576,2.0799452209472653,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,1048576,2.074207611083984,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,2097152,4.014495239257813,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,2097152,4.0323489379882815,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,4194304,8.167540893554687,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,4194304,8.203961791992189,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,8388608,15.805878906250001,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,8388608,15.504874267578126,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,16777216,30.06233642578125,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,16777216,29.48292236328125,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,33554432,60.0729833984375,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,33554432,59.436376953125,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,67108864,120.26708007812499,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,67108864,120.5294921875,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,134217728,240.07373046875,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,134217728,241.034921875,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,268435456,480.823359375,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,268435456,481.7181640625,vllm_eager
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,536870912,961.2289843750001,vllm_graph
vLLM,0.14.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,536870912,963.96390625,vllm_eager
