CUDA atomic function usage with volatile shared memory -

i have cuda kernel needs use atomic function on volatile shared integer memory. however, when try declare shared memory volatile , use in atomic function, error message.

below minimalist code reproduces error. please note following kernel nothing , horribly abuses why ever want declare shared memory volatile (or use shared memory @ all). does reproduce error.

the code uses atomic functions on shared memory, so, run it, need compile "arch12" or higher (in visual studio 2010, right click on project , go "properties -> configuration properties -> cuda c/c++ -> device" , enter "compute_12,sm_12" in "code generation" line). code should otherwise compile is.

#include <cstdlib> #include <cuda_runtime.h>  static int const x_thrds_per_blk = 32; static int const y_thrds_per_blk = 8;  __global__ void kernelwithsharedmemoryandatomicfunction(int * d_array, int numtotx, int numtoty) {               __shared__ int s_blk[y_thrds_per_blk][x_thrds_per_blk]; // compiles    //volatile __shared__ int s_blk[y_thrds_per_blk][x_thrds_per_blk]; // not compile     int tx = threadidx.x;    int ty = threadidx.y;     int mx = blockidx.x*blockdim.x + threadidx.x;    int = blockidx.y*blockdim.y + threadidx.y;    int mi = my*numtotx + mx;     if (mx < numtotx && < numtoty)    {       s_blk[ty][tx] = d_array[mi];        __syncthreads();        atomicmin(&s_blk[ty][tx], 4); // compile volatile shared memory if line commented out        __syncthreads();        d_array[mi] = s_blk[ty][tx];    } }  int main(void) {    // declare , initialize array on host    int const num_tot_x = 4*x_thrds_per_blk;    int const num_tot_y = 6*y_thrds_per_blk;     int * h_array = (int *)malloc(num_tot_x*num_tot_y*sizeof(int));     (int = 0; < num_tot_x*num_tot_y; ++i) h_array[i] = i;     // copy array device    int * d_array;    cudamalloc((void **)&d_array, num_tot_x*num_tot_y*sizeof(int));     cudamemcpy(d_array, h_array, num_tot_x*num_tot_y*sizeof(int), cudamemcpyhosttodevice);     // declare block , thread variables    dim3 thdsperblk;    dim3 blks;     thdsperblk.x = x_thrds_per_blk;    thdsperblk.y = y_thrds_per_blk;    thdsperblk.z = 1;     blks.x = (num_tot_x + x_thrds_per_blk - 1)/x_thrds_per_blk;    blks.y = (num_tot_y + y_thrds_per_blk - 1)/y_thrds_per_blk;    blks.z = 1;     // run kernel    kernelwithsharedmemoryandatomicfunction<<<blks, thdsperblk>>>(d_array, num_tot_x, num_tot_y);     // cleanup    free    (h_array);    cudafree(d_array);     return 0; }

anyway, if comment out "s_blk" declaration towards top of kernel , uncomment commented-out declaration following it, should following error:

error : no instance of overloaded function "atomicmin" matches argument list

i not understand why declaring shared memory volatile affect type, (i think) error message indicating, nor why cannot used atomic operations.

can please provide insight?

thanks,

aaron

just replace
atomicmin(&s_blk[ty][tx], 4);

atomicmin((int *)&s_blk[ty][tx], 4);.

it typecasts &s_blk[ty][tx] matches argument of atomicmin(..).

Search This Blog

Ben

CUDA atomic function usage with volatile shared memory -

Comments

Post a Comment

Popular posts from this blog

sublimetext3 - what keyboard shortcut is to comment/uncomment for this script tag in sublime -

post - imageshack API cURL -

dataset - MPAndroidchart returning no chart Data available -