## to_blur
image_format = cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.UNSIGNED_INT8)
# prepare device memory for OpenCL
input_buf_1 = cl.Image(
ctx,
cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR,
image_format,
img.size,
None,
img.tobytes(),
)
dest_buf_1 = cl.Image(ctx, cl.mem_flags.READ_WRITE, image_format, img.size)
# run kernel
mask_size = 0
np_mask_size = np.int32(mask_size)
# prg.to_blur(queue, (img_size,), (1,), dest_buf_1, input_buf_1, np_mask_size).wait()
## to_gray
dest_buf_2 = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, img_width * img_height * np.dtype(np.float32).itemsize)
prg.to_gray_buffer(queue, (img_width, img_height), (1, 1), dest_buf_2, input_buf_1).wait()
dest = np.zeros(img_width * img_height, np.float32)
cl.enqueue_copy(
queue,
dest=dest,
src=dest_buf_2,
buffer_origin=(0,),
host_origin=(0,),
region=(img_width * img_height * np.dtype(np.float32).itemsize,),
).wait()
## blur 2
dest_buf_3 = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, img_width * img_height * np.dtype(np.float32).itemsize)
# prg.gaussian_blur_separable_2d_buffer(queue, (img_width, img_height), (1, 1), dest_buf_3, dest_buf_2, 1,10,1).wait()
prg.to_blur_buffer(queue, (img_width, img_height), (1, 1), dest_buf_3, dest_buf_2, np_mask_size).wait()
## gradient
dx = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, img_width * img_height * np.dtype(np.float32).itemsize)
dy = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, img_width * img_height * np.dtype(np.float32).itemsize)
dxx = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, img_width * img_height * np.dtype(np.float32).itemsize)
dxy = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, img_width * img_height * np.dtype(np.float32).itemsize)
dyx = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, img_width * img_height * np.dtype(np.float32).itemsize) # redundant
dyy = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, img_width * img_height * np.dtype(np.float32).itemsize)
prg.gradient_x_2d_buffer(queue, (img_width, img_height), (1, 1), dx, dest_buf_3).wait()
prg.gradient_y_2d_buffer(queue, (img_width, img_height), (1, 1), dy, dest_buf_3).wait()
prg.gradient_x_2d_buffer(queue, (img_width, img_height), (1, 1), dxx, dx).wait()
prg.gradient_y_2d_buffer(queue, (img_width, img_height), (1, 1), dxy, dx).wait()
prg.gradient_x_2d_buffer(queue, (img_width, img_height), (1, 1), dyx, dy).wait() # redundant
prg.gradient_y_2d_buffer(queue, (img_width, img_height), (1, 1), dyy, dy).wait()
h = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, img_width * img_height * np.dtype(np.float32).itemsize)
prg.mean_curvature(queue, (img_width, img_height), (1, 1), h, dx, dy, dxx, dxy, dyy).wait()
# read image back to cpu
dest = np.zeros(img_width * img_height, np.float32)
cl.enqueue_copy(
queue,
dest=dest,
src=h,
buffer_origin=(0,),
host_origin=(0,),
region=(img_width * img_height * np.dtype(np.float32).itemsize,),
).wait()