# variable_length_memory_efficient_attention — GPU cuda error 700
# 根因: seq_lens 或 kv_seq_lens 中含 0 值
#
# NOTE: 这些 case 的真正触发条件是 seq_lens/kv_seq_lens 含特定值 (如 [0,1] / [1,0])
#       tensor_spec 当前只能约束分布范围, 无法精确指定 tensor 内容
#       这里去掉 stats 约束, 注释中标注所需的精确值
#       后续可考虑为 IntStats 增加 exact_values 约束

# case 1: seq_lens=[0,1], kv_seq_lens=[1,1]
incubate.nn.functional.variable_length_memory_efficient_attention(query=Tensor.float16((1, 1, 31, 64)), key=Tensor.float16((1, 1, 31, 64)), value=Tensor.float16((1, 1, 31, 64)), seq_lens=Tensor.int32((2,)).stats(low=0, high=2), kv_seq_lens=Tensor.int32((2,)).stats(low=0, high=2), mask=Tensor.float16((1, 1, 50, 50)), scale=0.125)

# case 2: seq_lens=[1,0], kv_seq_lens=[1,1]
incubate.nn.functional.variable_length_memory_efficient_attention(query=Tensor.float16((1, 1, 31, 64)), key=Tensor.float16((1, 1, 31, 64)), value=Tensor.float16((1, 1, 31, 64)), seq_lens=Tensor.int32((2,)).stats(low=0, high=2), kv_seq_lens=Tensor.int32((2,)).stats(low=0, high=2), mask=Tensor.float16((1, 1, 50, 50)), scale=0.125)

# case 3: seq_lens=[1,1], kv_seq_lens=[0,1]
incubate.nn.functional.variable_length_memory_efficient_attention(query=Tensor.float16((1, 1, 31, 64)), key=Tensor.float16((1, 1, 31, 64)), value=Tensor.float16((1, 1, 31, 64)), seq_lens=Tensor.int32((2,)).stats(low=0, high=2), kv_seq_lens=Tensor.int32((2,)).stats(low=0, high=2), mask=Tensor.float16((1, 1, 50, 50)), scale=0.125)

# case 4: seq_lens=[1,1], kv_seq_lens=[1,0]
incubate.nn.functional.variable_length_memory_efficient_attention(query=Tensor.float16((1, 1, 31, 64)), key=Tensor.float16((1, 1, 31, 64)), value=Tensor.float16((1, 1, 31, 64)), seq_lens=Tensor.int32((2,)).stats(low=0, high=2), kv_seq_lens=Tensor.int32((2,)).stats(low=0, high=2), mask=Tensor.float16((1, 1, 50, 50)), scale=0.125)
