framework,version,device,op_name,kernel_source,allreduce_dtype,num_gpus,message_size,latency,backend
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,128,0.006617599725723266,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,128,0.02142080068588257,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,256,0.0069363200664520255,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,256,0.02064863920211792,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,512,0.007006400227546691,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,512,0.020725440979003907,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,1024,0.0070070397853851316,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,1024,0.02094655990600586,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,2048,0.006899200081825256,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,2048,0.020766398906707763,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,4096,0.0070041602849960334,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,4096,0.020661439895629883,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,8192,0.007036160230636597,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,8192,0.019750720262527464,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,16384,0.007023360133171082,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,16384,0.01967455983161926,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,32768,0.007066239714622498,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,32768,0.019736640453338623,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,65536,0.0073942399024963384,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,65536,0.019667199850082397,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,131072,0.007411199808120728,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,131072,0.019728959798812867,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,262144,0.00947712004184723,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,262144,0.019771840572357178,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,524288,0.012925120592117308,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,524288,0.022752640247344972,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,1048576,0.02461440086364746,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,1048576,0.03544064044952393,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,2097152,0.03343807935714722,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,2097152,0.04347263813018799,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,4194304,0.0421884822845459,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,4194304,0.045228800773620605,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,8388608,0.05401375770568848,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,8388608,0.058442239761352544,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,16777216,0.08445599555969238,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,16777216,0.09661248207092285,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,33554432,0.15146400451660155,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,33554432,0.17649023056030272,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,67108864,0.27839839935302735,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,67108864,0.32268863677978515,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,134217728,0.5229487991333007,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,134217728,0.6067705535888672,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,268435456,1.0045484924316406,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,268435456,1.1750089263916015,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,2,536870912,1.9206410217285157,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,2,536870912,2.250664367675781,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,128,0.006456320285797119,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,128,0.02166368007659912,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,256,0.006628479957580566,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,256,0.021143999099731445,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,512,0.006738560199737549,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,512,0.0209769606590271,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,1024,0.006861760020256043,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,1024,0.02120896100997925,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,2048,0.00660256028175354,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,2048,0.021302399635314943,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,4096,0.006988160014152527,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,4096,0.020550079345703125,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,8192,0.007118719816207886,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,8192,0.020307838916778564,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,16384,0.00702015995979309,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,16384,0.020239360332489013,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,32768,0.007020480036735536,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,32768,0.020110080242156984,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,65536,0.007534719705581665,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,65536,0.029049599170684816,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,131072,0.008317440152168273,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,131072,0.02892447948455811,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,262144,0.01782495975494385,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,262144,0.028734400272369388,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,524288,0.018148159980773924,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,524288,0.02942336082458496,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,1048576,0.022641279697418214,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,1048576,0.05171040058135986,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,2097152,0.028938241004943847,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,2097152,0.05168543815612793,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,4194304,0.04248799800872803,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,4194304,0.05190976142883301,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,8388608,0.06793280124664307,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,8388608,0.12460032463073731,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,16777216,0.12090271949768065,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,16777216,0.13373279571533203,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,33554432,0.18362176895141602,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,33554432,0.20920480728149413,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,67108864,0.35012351989746093,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,67108864,0.39670623779296876,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,134217728,0.6746809387207031,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,134217728,0.7655292510986328,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,268435456,1.2940953063964844,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,268435456,1.465341796875,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,4,536870912,2.5246342468261718,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,4,536870912,2.857269897460937,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,128,0.007091839909553528,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,128,0.03529983997344971,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,256,0.006984959840774537,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,256,0.03586335897445679,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,512,0.0069055998325347895,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,512,0.02153248071670532,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,1024,0.006822720170021057,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,1024,0.021756479740142824,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,2048,0.007228800058364867,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,2048,0.02136928081512451,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,4096,0.007306560277938842,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,4096,0.0209116792678833,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,8192,0.007457600235939025,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,8192,0.02036384105682373,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,16384,0.00784063994884491,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,16384,0.02027071952819824,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,32768,0.00810271978378296,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,32768,0.020448958873748778,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,65536,0.008851199746131896,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,65536,0.020295679569244385,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,131072,0.02512959957122803,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,131072,0.03336447954177856,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,262144,0.025305919647216797,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,262144,0.03560415983200073,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,524288,0.016818560361862182,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,524288,0.036661760807037355,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,1048576,0.019657280445098877,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,1048576,0.03667648077011108,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,2097152,0.025416638851165775,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,2097152,0.036782400608062746,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,4194304,0.038318719863891605,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,4194304,0.04635903835296631,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,8388608,0.0620633602142334,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,8388608,0.07194911956787109,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,16777216,0.11010144233703614,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,16777216,0.1263638401031494,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,33554432,0.22414815902709959,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,33554432,0.2543881607055664,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,67108864,0.39605121612548827,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,67108864,0.42496063232421877,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,134217728,0.7200777435302734,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,134217728,0.7894678497314453,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,268435456,1.3503369140625001,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,268435456,1.5124946594238282,vllm_eager
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_graph,bfloat16,8,536870912,2.6113616943359377,vllm_graph
vLLM,0.16.0,NVIDIA B200,all_reduce,vLLM_custom_eager,bfloat16,8,536870912,2.9403720092773438,vllm_eager
