# kernel.py
import numpy as np

def kernel(a, b):
    """
    Naive matmul matching the reference contract.
    a: nested list of float, shape (M, K)
    b: nested list of float, shape (K, N)
    returns: nested list of float, shape (M, N)
    where c[i][j] = sum over k of a[i][k] * b[k][j]
    """
    # Convert to numpy float64 (double precision) to match reference
    a_np = np.array(a, dtype=np.float64)
    b_np = np.array(b, dtype=np.float64)

    M, K = a_np.shape
    K2, N = b_np.shape
    assert K == K2, f"Inner dimensions must match: {K} != {K2}"

    c_np = np.zeros((M, N), dtype=np.float64)
    for i in range(M):
        for j in range(N):
            s = 0.0
            for k in range(K):
                s += a_np[i, k] * b_np[k, j]
            c_np[i, j] = s

    return c_np.tolist()