My CUDA Helper for Visual Studio
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
#pragma once #pragma comment(lib, "cudart.lib") #if _DEBUG #pragma comment(lib, "opencv_world330d.lib") #else #pragma comment(lib, "opencv_world330.lib") #endif #ifdef __CUDACC__ #define KERNEL_ARG2(grid, block) <<< grid, block >>> #define KERNEL_ARG3(grid, block, sh_mem) <<< grid, block, sh_mem >>> #define KERNEL_ARG4(grid, block, sh_mem, stream) <<< grid, block, sh_mem, stream >>> #else #define KERNEL_ARG2(grid, block) #define KERNEL_ARG3(grid, block, sh_mem) #define KERNEL_ARG4(grid, block, sh_mem, stream) #endif #ifdef __INTELLISENSE__ int __float_as_int(float in); float __int_as_float(int in); short __float2half_rn(float in); //Compare-and-Swap operation. unsigned int atomicInc(unsigned int* address, unsigned int val); int atomicCAS(int* address, int compare, int val); unsigned int atomicCAS(unsigned int* address, unsigned int compare, unsigned int val); unsigned long long int atomicCAS(unsigned long long int* address, unsigned long long int compare, unsigned long long int val); int atomicAdd(int* address, int val); unsigned int atomicAdd(unsigned int* address, unsigned int val); unsigned long long int atomicAdd(unsigned long long int* address, unsigned long long int val); float atomicAdd(float* address, float val); double atomicAdd(double* address, double val); int atomicSub(int* address, int val); unsigned int atomicSub(unsigned int* address, unsigned int val); template<class T> T tex2DLayered(cudaTextureObject_t texObj, float x, float y, int layer); template<class T> T tex2DLayered(cudaTextureObject_t texObj, int x, int y, int layer); template<class T> T tex2DLayered(texture<unsigned short, cudaTextureType2DLayered, cudaReadModeElementType> texObj, int x, int y, int layer); template<class T> T surf2DLayeredread(cudaSurfaceObject_t surfObj, int x, int y, int layer, boundaryMode = cudaBoundaryModeTrap); template<class T> void surf2DLayeredread(T data, cudaSurfaceObject_t surfObj, int x, int y, int layer, boundaryMode = cudaBoundaryModeTrap); template<class T> void surf3Dwrite(T data, cudaSurfaceObject_t surfObj, int x, int y, int z, boundaryMode = cudaBoundaryModeTrap); #define __syncthreads() #define __syncthreads_or(a) a template<class T> T tex3D(cudaTextureObject_t texObj, float x, float y, float z); template<class T> T tex3DLod(cudaTextureObject_t texObj, float x, float y, float z, float level); #endif |