framework,version,device,op_name,kernel_source,allreduce_dtype,num_gpus,message_size,latency,backend
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,128,0.005178560018539428,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,128,0.08990143775939942,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,256,0.005158720016479492,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,256,0.08836607933044434,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,512,0.0052089601755142215,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,512,0.07040480136871338,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,1024,0.00525439977645874,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,1024,0.0701196813583374,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,2048,0.005401920080184937,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,2048,0.06984384059906006,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,4096,0.005654399991035461,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,4096,0.06970431804656982,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,8192,0.006101120114326477,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,8192,0.06281536102294923,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,16384,0.00738431990146637,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,16384,0.04338272094726563,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,32768,0.009214079976081849,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,32768,0.04413631916046142,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,65536,0.01099616050720215,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,65536,0.043452157974243164,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,131072,0.018053439855575563,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,131072,0.0438921594619751,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,262144,0.03210272073745728,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,262144,0.04670527935028076,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,524288,0.060245118141174316,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,524288,0.06608575820922852,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,1048576,0.1168825626373291,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,1048576,0.12371999740600585,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,2097152,0.2292707252502441,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,2097152,0.23953184127807617,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,4194304,0.4526876831054688,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,4194304,0.45511550903320314,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,8388608,0.8947647857666017,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,8388608,0.9009033966064454,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,16777216,1.7820223999023437,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,16777216,1.8337475585937502,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,33554432,3.5514523315429685,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,33554432,3.7015270996093745,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,67108864,7.0484375,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,67108864,7.435090332031249,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,134217728,14.144284667968751,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,134217728,14.867047119140626,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,268435456,28.14252685546875,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,268435456,29.7725927734375,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,2,536870912,56.4513525390625,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,2,536870912,59.1657275390625,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,128,0.013893760442733766,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,128,0.10321951866149903,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,256,0.014106559753417968,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,256,0.09787839889526367,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,512,0.014098240137100222,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,512,0.06077023983001709,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,1024,0.014300160408020018,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,1024,0.060950398445129395,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,2048,0.014575040340423584,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,2048,0.057985601425170896,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,4096,0.015106879472732542,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,4096,0.05592512130737305,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,8192,0.01804352045059204,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,8192,0.056322240829467775,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,16384,0.026512000560760494,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,16384,0.05823999881744385,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,32768,0.039118080139160155,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,32768,0.05855775833129882,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,65536,0.06352928161621094,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,65536,0.06460000038146972,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,131072,0.11636992454528809,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,131072,0.11869888305664063,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,262144,0.22644447326660155,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,262144,0.22830368041992188,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,524288,0.38085758209228515,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,524288,0.3721027374267578,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,1048576,0.8656301116943359,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,1048576,0.8724806213378906,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,2097152,1.708424072265625,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,2097152,1.7059510803222657,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,4194304,3.394859008789062,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,4194304,3.417503967285156,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,8388608,6.854564819335937,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,8388608,6.873584594726563,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,16777216,13.864631347656251,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,16777216,13.888105468750002,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,33554432,27.705991210937498,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,33554432,27.88364501953125,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,67108864,55.3581689453125,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,67108864,55.7532470703125,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,134217728,110.4824609375,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,134217728,111.3140625,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,268435456,220.16498046875,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,268435456,222.73042968750002,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,4,536870912,438.63359375,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,4,536870912,443.3823046875,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,128,0.02563584089279175,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,128,0.14328319549560548,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,256,0.025929279327392578,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,256,0.09300000190734863,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,512,0.026319360733032225,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,512,0.09329631805419922,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,1024,0.026338560581207277,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,1024,0.06093056201934814,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,2048,0.06301727771759033,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,2048,0.10464032173156738,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,4096,0.03029247999191284,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,4096,0.05890783786773681,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,8192,0.0275929594039917,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,8192,0.05595647811889648,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,16384,0.04765952110290527,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,16384,0.05668384075164795,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,32768,0.08394687652587891,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,32768,0.07773983955383301,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,65536,0.12237279891967774,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,65536,0.12187935829162597,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,131072,0.22209152221679687,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,131072,0.2272175979614258,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,262144,0.43252033233642584,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,262144,0.4326636886596679,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,524288,0.6118918228149414,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,524288,0.580374412536621,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,1048576,2.0805783081054687,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,1048576,2.0676425170898436,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,2097152,4.127549743652343,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,2097152,4.107213745117187,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,4194304,8.1926123046875,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,4194304,8.198350830078125,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,8388608,15.889284667968749,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,8388608,15.543126220703124,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,16777216,30.19278564453125,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,16777216,29.55896728515625,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,33554432,60.3754052734375,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,33554432,60.167431640625,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,67108864,120.53289062500001,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,67108864,121.13933593750001,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,134217728,240.81701171875002,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,134217728,242.90185546875,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,268435456,482.3644140625,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,268435456,485.3579296875,vllm_eager
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_graph,float16,8,536870912,964.48703125,vllm_graph
vLLM,0.12.0,NVIDIA L40S,all_reduce,vLLM_custom_eager,float16,8,536870912,971.450234375,vllm_eager
