CLamsee - Bilateral Filter

This tutorial demonstrates the implementation of a bilateral filter implementation using the CLamsee slang. The session for this tutorial is inlcuded in the ViSlang binary package but you can also download it here. CLamsee is a a general slang for parallel computing that consists of a subset of OpenCL C, an introduction can be found here.

Implementation

For the purpose of our tutorial we ported the bilateral edge preserving filter from the openly available OpenCV library to our system. The bilateral filter is implemented in a straight forward way where each work item processes one output pixel. The work item iterates over a 2D-neighborhood of radius r and sums up the weighted colors of the input pixels. Intuitively speaking, the weights are the product of a Gaussian spatial weight (small weights for pixels farther away) and a Gaussian range weight (small weights for colors that are very different in the red-, green-, and blue channels from the pixel under consideration). The performance of the parallel OpenCL implementation of the filter is much faster than the CPU implementation, being the potential enabler of many real time applications.

Device Code

Device.setInstrumentation(true); using Device; kernel bilateral_kernel(read_only image inputImage, write_only image outputImage, int radius, int src_step, int src_offset, int dst_step, int dst_offset, int dst_rows, int dst_cols, int local_mem_size, int blockDimX, int blockDimY, int debug_x, int debug_y, float gauss_space_coeff, float gauss_color_coeff, int single_thread) { const sampler_t sampler=CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_CLAMP|CLK_FILTER_LINEAR; int xcoord = get_global_id(0); int ycoord = get_global_id(1); if (single_thread) { xcoord= debug_x; ycoord = debug_y; } if ( (ycoord < dst_rows) & (xcoord < dst_cols)) { int2 image_pos01 = {xcoord, ycoord}; int2 image_pos02 = {xcoord, ycoord}; float4 incolor = read_imagef(inputImage, sampler, image_pos01); float3 sum = {0,0,0}; float4 clearColor = {0,0,0,1}; float wsum = 0; if (single_thread) { write_imagef(outputImage, image_pos01, clearColor); } for (int xOffset = -radius; xOffset <= radius; xOffset=xOffset+1 ) { for (int yOffset = -radius; yOffset <= radius; yOffset=yOffset+1 ) { int2 npos = {xcoord+xOffset, ycoord+yOffset}; if ((npos.x>=0) & (npos.x<dst_rows) & (npos.y>=0) & (npos.y<dst_cols)) { float dx = xOffset; float dy = yOffset; float distance = sqrt(dx*dx+dy*dy); if (distance <= radius) { // space weight float spaceweight = native_exp(-4 * distance * distance / (radius*radius) ); // value weight float4 color = read_imagef(inputImage, sampler, npos); float3 diff3 = fabs(color.xyz - incolor.xyz ); float diff = diff3.x + diff3.y + diff3.z; float valueweight = native_exp(-1.0 * diff * diff *gauss_color_coeff ); // combined weight float w = spaceweight * valueweight; sum += color.xyz * w; wsum += w; @emit float debug_gauss[100]; @emit float debug_value_weight[100]; @emit float debug_combined[100]; if ( yOffset == 0 ) { int off = (100 - radius*2)/2; debug_gauss[off+xOffset+radius] = spaceweight; debug_value_weight[off+xOffset+radius] = valueweight; debug_combined[off+xOffset+radius] = w; } if (single_thread) { float4 out2 = {color.x*w,color.y*w,color.z*w,1}; write_imagef(outputImage, npos, out2); } } } } } sum /= wsum; float4 outColor = {0,0,0,1}; outColor.xyz = sum; if (!single_thread) { write_imagef(outputImage, image_pos02, outColor); } } }

Host Code

vs.declareInteger('radius', 15, 1, 127); vs.declareFloat('gauss_space_coeff', 0.1, 0.00001, 2); vs.declareFloat('gauss_color_coeff', 1.5, 0.0001, 50); integer imageSize = 512; boolean single_thread = false; vs.declareInteger('debugX', imageSize/2, 0, imageSize-1); vs.declareInteger('debugY', imageSize/2, 0, imageSize-1); vs.declareInteger('repeat', 0, 0, 20); WebView.clearCharts(); WebView.showScatterPlot(debug_gauss, debug_value_weight, debug_combined); integer local_size_x = 32; integer local_size_y = 4; integer local_mem_size = (local_size_x + 2) * (local_size_y + 2); Device.createGLImage('image_A', imageSize, imageSize); Device.createGLImage('image_B', imageSize, imageSize); Device.createGLImage('image_single_thread', imageSize, imageSize); integer updateView = 0; void computeFilter(float t){ if (single_thread) { updateView = updateView+1; if((updateView%3)==0) { WebView.update(); } Images.clearBlack(image_single_thread); Device.setLocalWorkSize(1, 1); Device.setGlobalWorkSize(debugSizeX,debugSizeY); Device.bilateral_kernel( lena_small, image_single_thread, radius, 1,1,1,1,imageSize,imageSize, local_mem_size, local_size_x, local_size_y, debugX, debugY, gauss_space_coeff, gauss_color_coeff, single_thread); } else { Device.setLocalWorkSize(local_size_x, local_size_y); Device.setGlobalWorkSize(imageSize,imageSize); Device.bilateral_kernel( lena_small, image_single_thread, radius, 1,1,1,1,imageSize,imageSize, local_mem_size, local_size_x, local_size_y, debugX, debugY, gauss_space_coeff, gauss_color_coeff, single_thread); for (integer i = 0; i < repeat; i = i+1) { Device.bilateral_kernel( image_A, image_B, radius, 1,1,1,1,imageSize,imageSize, local_mem_size, local_size_x, local_size_y, debugX, debugY, gauss_space_coeff, gauss_color_coeff, single_thread); Device.bilateral_kernel( image_B, image_A, radius, 1,1,1,1,imageSize,imageSize, local_mem_size, local_size_x, local_size_y, debugX, debugY, gauss_space_coeff, gauss_color_coeff, single_thread); } } } //set trigger //if any of these variables are change, the filter is automatically called repeat -> computeFilter(radius); single_thread -> computeFilter(radius); radius -> computeFilter(radius); debugX -> computeFilter(radius); debugY -> computeFilter(radius); gauss_space_coeff-> computeFilter(radius); gauss_color_coeff-> computeFilter(radius); //call bilateral filter computeFilter(radius);

Copyright © 2015-2017 vislang.net