gpufilter
GPU-Efficient Recursive Filtering and Summed-Area Tables
|
Classes | |
struct | gpufilter::_alg_setup |
Algorithm setup to configure the GPU to run. More... | |
Modules | |
GPU Computation functions | |
Functions | |
void | gpufilter::calc_alg_setup (alg_setup &algs, const int &w, const int &h) |
Calculate algorithm setup values. | |
void | gpufilter::calc_alg_setup (alg_setup &algs, const int &w, const int &h, const int &extb) |
Upload device constants sizes. | |
void | gpufilter::up_alg_setup (const alg_setup &algs) |
Upload algorithm setup values. | |
void | gpufilter::up_constants_coefficients1 (const float &b0, const float &a1) |
Upload device constants first-order coefficients. | |
void | gpufilter::up_constants_coefficients2 (const float &b0, const float &a1, const float &a2) |
Upload device constants second-order coefficients. | |
void | gpufilter::prepare_algSAT (alg_setup &algs, dvector< float > &d_inout, dvector< float > &d_ybar, dvector< float > &d_vhat, dvector< float > &d_ysum, const float *h_in, const int &w, const int &h) |
Prepare for Algorithm SAT. | |
void | gpufilter::algSAT (dvector< float > &d_out, dvector< float > &d_ybar, dvector< float > &d_vhat, dvector< float > &d_ysum, const dvector< float > &d_in, const alg_setup &algs) |
Compute Algorithm SAT. | |
void | gpufilter::algSAT (dvector< float > &d_inout, dvector< float > &d_ybar, dvector< float > &d_vhat, dvector< float > &d_ysum, const alg_setup &algs) |
Compute Algorithm SAT. | |
void | gpufilter::algSAT (float *inout, const int &w, const int &h) |
Compute Algorithm SAT. | |
void | gpufilter::prepare_alg4 (alg_setup &algs, alg_setup &algs_transp, dvector< float > &d_out, dvector< float > &d_transp_out, dvector< float2 > &d_transp_pybar, dvector< float2 > &d_transp_ezhat, dvector< float2 > &d_pubar, dvector< float2 > &d_evhat, cudaArray *&a_in, const float *h_in, const int &w, const int &h, const float &b0, const float &a1, const float &a2, const int &extb=0, const initcond &ic=zero) |
Prepare for Algorithm 4. | |
void | gpufilter::alg4 (dvector< float > &d_out, dvector< float > &d_transp_out, dvector< float2 > &d_transp_pybar, dvector< float2 > &d_transp_ezhat, dvector< float2 > &d_pubar, dvector< float2 > &d_evhat, const cudaArray *a_in, const alg_setup &algs, const alg_setup &algs_transp) |
Compute Algorithm 4 (first-order) | |
void | gpufilter::alg4 (float *h_inout, const int &w, const int &h, const float &b0, const float &a1, const float &a2, const int &extb=0, const initcond &ic=zero) |
Compute Algorithm 4 (second-order) | |
void | gpufilter::prepare_alg5 (alg_setup &algs, dvector< float > &d_out, dvector< float > &d_transp_pybar, dvector< float > &d_transp_ezhat, dvector< float > &d_ptucheck, dvector< float > &d_etvtilde, cudaArray *&a_in, const float *h_in, const int &w, const int &h, const float &b0, const float &a1, const int &extb=0, const initcond &ic=zero) |
Prepare for Algorithm 5. | |
void | gpufilter::alg5 (dvector< float > &d_out, dvector< float > &d_transp_pybar, dvector< float > &d_transp_ezhat, dvector< float > &d_ptucheck, dvector< float > &d_etvtilde, const cudaArray *a_in, const alg_setup &algs) |
Compute Algorithm 5 (first-order) | |
void | gpufilter::alg5 (float *h_inout, const int &w, const int &h, const float &b0, const float &a1, const int &extb=0, const initcond &ic=zero) |
Compute Algorithm 5 (first-order) | |
void | gpufilter::gaussian_gpu (float **inout, const int &w, const int &h, const int &d, const float &s, const int &extb=1, const initcond &ic=clamp) |
Gaussian blur an image in the GPU. | |
void | gpufilter::gaussian_gpu (float *inout, const int &w, const int &h, const float &s, const int &extb=1, const initcond &ic=clamp) |
Gaussian blur a single-channel image in the GPU. | |
void | gpufilter::bspline3i_gpu (float **inout, const int &w, const int &h, const int &d, const int &extb=1, const initcond &ic=mirror) |
Compute the Bicubic B-Spline interpolation of an image in the GPU. | |
void | gpufilter::bspline3i_gpu (float *inout, const int &w, const int &h, const int &extb=1, const initcond &ic=mirror) |
Compute the Bicubic B-Spline interpolation of a single-channel image in the GPU. | |
__host__ void | gpufilter::up_texture (cudaArray *&a_in, const float *h_in, const int &w, const int &h, const initcond &ic) |
Upload input image as a texture in device. |
void gpufilter::alg4 | ( | dvector< float > & | d_out, |
dvector< float > & | d_transp_out, | ||
dvector< float2 > & | d_transp_pybar, | ||
dvector< float2 > & | d_transp_ezhat, | ||
dvector< float2 > & | d_pubar, | ||
dvector< float2 > & | d_evhat, | ||
const cudaArray * | a_in, | ||
const alg_setup & | algs, | ||
const alg_setup & | algs_transp | ||
) |
Compute Algorithm 4 (first-order)
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
[out] | d_out | The output 2D image allocated in device memory |
[out] | d_transp_out | The transposed output 2D image used in the middle of the computation |
[out] | d_transp_pybar | The allocated in device memory |
[out] | d_transp_ezhat | The allocated in device memory |
[out] | d_pubar | The allocated in device memory |
[out] | d_evhat | The allocated in device memory |
[in] | a_in | The input 2D image allocated in device memory as cudaArray |
[in] | algs | Algorithm setup to be uploaded to the GPU |
[in] | algs_transp | Algorithm setup transposed to be used in the middle of the computation |
void gpufilter::alg4 | ( | float * | h_inout, |
const int & | w, | ||
const int & | h, | ||
const float & | b0, | ||
const float & | a1, | ||
const float & | a2, | ||
const int & | extb = 0 , |
||
const initcond & | ic = zero |
||
) |
Compute Algorithm 4 (second-order)
This function computes second-order recursive filtering with given feedback and feedforward coefficients of an input 2D image using algorithm .
The algorithm 4 is discussed in depth in our paper ([Nehab:2011] cited in alg5() function).
[in,out] | h_inout | The in/output 2D image to compute recursive filtering |
[in] | w | Image width |
[in] | h | Image height |
[in] | b0 | Feedforward coefficient |
[in] | a1 | Feedback first-order coefficient |
[in] | a2 | Feedback second-order coefficient |
[in] | extb | Extension (in blocks) to consider outside image (default 0) |
[in] | ic | Initial condition (for outside access) (default zero) |
void gpufilter::alg5 | ( | dvector< float > & | d_out, |
dvector< float > & | d_transp_pybar, | ||
dvector< float > & | d_transp_ezhat, | ||
dvector< float > & | d_ptucheck, | ||
dvector< float > & | d_etvtilde, | ||
const cudaArray * | a_in, | ||
const alg_setup & | algs | ||
) |
Compute Algorithm 5 (first-order)
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
[out] | d_out | The output 2D image allocated in device memory |
[out] | d_transp_pybar | The allocated in device memory |
[out] | d_transp_ezhat | The allocated in device memory |
[out] | d_ptucheck | The allocated in device memory |
[out] | d_etvtilde | The allocated in device memory |
[in] | a_in | The input 2D image allocated in device memory as cudaArray |
[in] | algs | Algorithm setup used for this computation |
void gpufilter::alg5 | ( | float * | h_inout, |
const int & | w, | ||
const int & | h, | ||
const float & | b0, | ||
const float & | a1, | ||
const int & | extb = 0 , |
||
const initcond & | ic = zero |
||
) |
Compute Algorithm 5 (first-order)
This function computes first-order recursive filtering with given feedback and feedforward coefficients of an input 2D image using algorithm .
The algorithm 5 is discussed in depth in our paper:
@inproceedings{Nehab:2011, title = {{GPU}-{E}fficient {R}ecursive {F}iltering and {S}ummed-{A}rea {T}ables}, author = {{N}ehab, {D}. and {M}aximo, {A}. and {L}ima, {R}. {S}. and {H}oppe, {H}.}, journal = {{ACM} {T}ransactions on {G}raphics ({P}roceedings of the {ACM} {SIGGRAPH} {A}sia 2011)}, year = {2011}, volume = {30}, number = {6}, doi = {}, publisher = {ACM}, address = {{N}ew {Y}ork, {NY}, {USA}} }
[in,out] | h_inout | The in/output 2D image to compute recursive filtering in host memory |
[in] | w | Image width |
[in] | h | Image height |
[in] | b0 | Feedforward coefficient |
[in] | a1 | Feedback coefficient |
[in] | extb | Extension (in blocks) to consider outside image (default 0) |
[in] | ic | Initial condition (for outside access) (default zero) |
void gpufilter::algSAT | ( | dvector< float > & | d_out, |
dvector< float > & | d_ybar, | ||
dvector< float > & | d_vhat, | ||
dvector< float > & | d_ysum, | ||
const dvector< float > & | d_in, | ||
const alg_setup & | algs | ||
) |
Compute Algorithm SAT.
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
[out] | d_out | The output 2D image allocated in device memory |
[out] | d_ybar | The allocated in device memory |
[out] | d_vhat | The allocated in device memory |
[out] | d_ysum | The allocated in device memory |
[in] | d_in | The input 2D image allocated in device memory |
[in] | algs | Algorithm setup used for this computation |
void gpufilter::algSAT | ( | dvector< float > & | d_inout, |
dvector< float > & | d_ybar, | ||
dvector< float > & | d_vhat, | ||
dvector< float > & | d_ysum, | ||
const alg_setup & | algs | ||
) |
Compute Algorithm SAT.
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
[in,out] | d_inout | The in/output 2D image allocated in device memory |
[out] | d_ybar | The allocated in device memory |
[out] | d_vhat | The allocated in device memory |
[out] | d_ysum | The allocated in device memory |
[in] | algs | Algorithm setup used for this computation |
void gpufilter::algSAT | ( | float * | inout, |
const int & | w, | ||
const int & | h | ||
) |
Compute Algorithm SAT.
This function computes the summed-area table (SAT) of an input 2D image using algorithm SAT.
The algorithm SAT is discussed in depth in our paper (see [Nehab:2011] in alg5() function) where the following image illustrates the process:
Overlapped summed-area table computation according to algorithm SAT. Stage S.1 reads the input (in gray) then computes and stores incomplete prologues (in red) and (in blue). Stage S.2 completes prologues and computes scalars (in yellow). Stage S.3 completes prologues . Finally, stage S.4 reads the input and completed prologues, then computes and stores the final summed-area table.
[in,out] | inout | The in/output 2D image to compute SAT |
[in] | w | Image width |
[in] | h | Image height |
void gpufilter::bspline3i_gpu | ( | float ** | inout, |
const int & | w, | ||
const int & | h, | ||
const int & | d, | ||
const int & | extb = 1 , |
||
const initcond & | ic = mirror |
||
) |
Compute the Bicubic B-Spline interpolation of an image in the GPU.
Given an input 2D image compute the Bicubic B-Spline interpolation of it by applying a first-order recursive filter using zero-border initial conditions.
[in,out] | inout | The 2D image to compute the Bicubic B-Spline interpolation |
[in] | w | Width of the input image |
[in] | h | Height of the input image |
[in] | d | Depth of the input image (color channels) |
[in] | extb | Extension (in blocks) to consider outside image (default 1 block) |
[in] | ic | Initial condition (for outside access) (default mirror) |
void gpufilter::bspline3i_gpu | ( | float * | inout, |
const int & | w, | ||
const int & | h, | ||
const int & | extb = 1 , |
||
const initcond & | ic = mirror |
||
) |
Compute the Bicubic B-Spline interpolation of a single-channel image in the GPU.
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
[in,out] | inout | The single-channel 2D image to compute the Bicubic B-Spline interpolation |
[in] | w | Width of the input image |
[in] | h | Height of the input image |
[in] | extb | Extension (in blocks) to consider outside image (default 1 block) |
[in] | ic | Initial condition (for outside access) (default mirror) |
void gpufilter::calc_alg_setup | ( | alg_setup & | algs, |
const int & | w, | ||
const int & | h | ||
) |
Calculate algorithm setup values.
Given the dimensions of the 2D work image, calculate the device constant memory size-related values. It returns the setup to run any GPU algorithm.
[out] | algs | Algorithm setup to be uploaded to the GPU |
[in] | w | Width of the work image |
[in] | h | Height of the work image |
void gpufilter::calc_alg_setup | ( | alg_setup & | algs, |
const int & | w, | ||
const int & | h, | ||
const int & | extb | ||
) |
Upload device constants sizes.
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. Given the dimensions of the 2D work image, calculate the device constant memory size-related values. The work image is the original image plus extension blocks to run algorithms out-of-bounds. It returns the setup to run any GPU algorithm.
[out] | algs | Algorithm setup to be uploaded to the GPU |
[in] | w | Width of the work image |
[in] | h | Height of the work image |
[in] | extb | Extension (in blocks) to consider outside image |
void gpufilter::gaussian_gpu | ( | float ** | inout, |
const int & | w, | ||
const int & | h, | ||
const int & | d, | ||
const float & | s, | ||
const int & | extb = 1 , |
||
const initcond & | ic = clamp |
||
) |
Gaussian blur an image in the GPU.
Given an input single-channel 2D image compute the Gaussian blur of it by applying a first-order recursive filter (using alg5()) followed by a second-order recursive filter (using alg4()) and zero-border initial condition.
[in,out] | inout | The 2D image to compute Gaussian blur |
[in] | w | Width of the input image |
[in] | h | Height of the input image |
[in] | d | Depth of the input image (color channels) |
[in] | s | Sigma support of Gaussian blur computation |
[in] | extb | Extension (in blocks) to consider outside image (default 1 block) |
[in] | ic | Initial condition (for outside access) (default clamp) |
void gpufilter::gaussian_gpu | ( | float * | inout, |
const int & | w, | ||
const int & | h, | ||
const float & | s, | ||
const int & | extb = 1 , |
||
const initcond & | ic = clamp |
||
) |
Gaussian blur a single-channel image in the GPU.
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
[in,out] | inout | The single-channel 2D image to compute Gaussian blur |
[in] | w | Width of the input image |
[in] | h | Height of the input image |
[in] | s | Sigma support of Gaussian blur computation |
[in] | extb | Extension (in blocks) to consider outside image (default 1 block) |
[in] | ic | Initial condition (for outside access) (default clamp) |
void gpufilter::prepare_alg4 | ( | alg_setup & | algs, |
alg_setup & | algs_transp, | ||
dvector< float > & | d_out, | ||
dvector< float > & | d_transp_out, | ||
dvector< float2 > & | d_transp_pybar, | ||
dvector< float2 > & | d_transp_ezhat, | ||
dvector< float2 > & | d_pubar, | ||
dvector< float2 > & | d_evhat, | ||
cudaArray *& | a_in, | ||
const float * | h_in, | ||
const int & | w, | ||
const int & | h, | ||
const float & | b0, | ||
const float & | a1, | ||
const float & | a2, | ||
const int & | extb = 0 , |
||
const initcond & | ic = zero |
||
) |
Prepare for Algorithm 4.
This function prepares the data structures used by the recursive filtering algorithm 4 (order 2) of an input 2D image.
The algorithm 4 is discussed in depth in our paper (see [Nehab:2011] in alg5() function) and it is implemented in alg4() function.
[out] | algs | Algorithm setup computed for initial computation |
[out] | algs_transp | Algorithm setup transposed to be used in the middle of the computation |
[out] | d_out | The output 2D image to be allocated in device memory |
[out] | d_transp_out | The transposed output 2D image used in the middle of the computation |
[out] | d_transp_pybar | The to be allocated in device memory |
[out] | d_transp_ezhat | The to be allocated in device memory |
[out] | d_pubar | The to be allocated in device memory |
[out] | d_evhat | The to be allocated in device memory |
[out] | a_in | The input 2D image as cudaArray to be allocated and copied to device memory |
[in] | h_in | The input 2D image to compute algorithm 4 in host memory |
[in] | w | Image width |
[in] | h | Image height |
[in] | b0 | Feedforward coefficient |
[in] | a1 | Feedback coefficient |
[in] | a2 | Feedback coefficient |
[in] | extb | Extension (in blocks) to consider outside image (default 0) |
[in] | ic | Initial condition (for outside access) (default zero) |
void gpufilter::prepare_alg5 | ( | alg_setup & | algs, |
dvector< float > & | d_out, | ||
dvector< float > & | d_transp_pybar, | ||
dvector< float > & | d_transp_ezhat, | ||
dvector< float > & | d_ptucheck, | ||
dvector< float > & | d_etvtilde, | ||
cudaArray *& | a_in, | ||
const float * | h_in, | ||
const int & | w, | ||
const int & | h, | ||
const float & | b0, | ||
const float & | a1, | ||
const int & | extb = 0 , |
||
const initcond & | ic = zero |
||
) |
Prepare for Algorithm 5.
This function prepares the data structures used by the recursive filtering algorithm 5 (order 1) of an input 2D image.
The algorithm 5 is discussed in depth in our paper (see [Nehab:2011] in alg5() function) and it is implemented in alg5() function.
[out] | algs | Algorithm setup computed and uploaded to the GPU |
[out] | d_out | The output 2D image to be allocated in device memory |
[out] | d_transp_pybar | The to be allocated in device memory |
[out] | d_transp_ezhat | The to be allocated in device memory |
[out] | d_ptucheck | The to be allocated in device memory |
[out] | d_etvtilde | The to be allocated in device memory |
[out] | a_in | The input 2D image as cudaArray to be allocated and copied to device memory |
[in] | h_in | The input 2D image to compute algorithm 5 in host memory |
[in] | w | Image width |
[in] | h | Image height |
[in] | b0 | Feedforward coefficient |
[in] | a1 | Feedback coefficient |
[in] | extb | Extension (in blocks) to consider outside image (default 0) |
[in] | ic | Initial condition (for outside access) (default zero) |
void gpufilter::prepare_algSAT | ( | alg_setup & | algs, |
dvector< float > & | d_inout, | ||
dvector< float > & | d_ybar, | ||
dvector< float > & | d_vhat, | ||
dvector< float > & | d_ysum, | ||
const float * | h_in, | ||
const int & | w, | ||
const int & | h | ||
) |
Prepare for Algorithm SAT.
This function prepares the data structures used by the summed-area table (SAT) algorithm of an input 2D image.
The algorithm SAT is discussed in depth in our paper (see [Nehab:2011] in alg5() function) and it is implemented in algSAT() function.
[out] | algs | Algorithm setup computed and uploaded to the GPU |
[out] | d_inout | The in/output 2D image to be allocated and copied to device memory |
[out] | d_ybar | The to be allocated in device memory |
[out] | d_vhat | The to be allocated in device memory |
[out] | d_ysum | The to be allocated in device memory |
[in] | h_in | The input 2D image to compute SAT in host memory |
[in] | w | Image width |
[in] | h | Image height |
void gpufilter::up_alg_setup | ( | const alg_setup & | algs | ) |
Upload algorithm setup values.
Given the algorithm setup, upload the values to the device constant memory.
[in] | algs | Algorithm setup to upload to the GPU |
void gpufilter::up_constants_coefficients1 | ( | const float & | b0, |
const float & | a1 | ||
) |
Upload device constants first-order coefficients.
Given the first-order coefficients of the recursive filter, upload to the device constant memory the coefficients-related values.
[in] | b0 | Feedforward coefficient |
[in] | a1 | Feedback first-order coefficient |
void gpufilter::up_constants_coefficients2 | ( | const float & | b0, |
const float & | a1, | ||
const float & | a2 | ||
) |
Upload device constants second-order coefficients.
Given the second-order coefficients of the recursive filter, upload to the device constant memory the coefficients-related values.
[in] | b0 | Feedforward coefficient |
[in] | a1 | Feedback first-order coefficient |
[in] | a2 | Feedback second-order coefficient |
__host__ void gpufilter::up_texture | ( | cudaArray *& | a_in, |
const float * | h_in, | ||
const int & | w, | ||
const int & | h, | ||
const initcond & | ic | ||
) |
Upload input image as a texture in device.
Given an input image in the host, upload it to the device memory as a texture.
[out] | a_in | The input 2D image as cudaArray to be allocated and copied to device memory |
[in] | h_in | The input 2D image to compute algorithm 5 in host memory |
[in] | w | Image width |
[in] | h | Image height |
[in] | ic | Initial condition (for outside access) (default zero) |