#include <cudaImageList.h>

Classes
struct	KernelInst

Public Member Functions
cudaExtent	getExtent ()

KernelInst	kernelInst ()

void	bind (texture< T, cudaTextureType2DLayered, cudaReadModeElementType > &texref)

void	unbind (texture< T, cudaTextureType2DLayered, cudaReadModeElementType > &texref)

void	bind (surface< void, cudaSurfaceType2DLayered > &surf)

	Image4DCudaArray (int sx, int sy, int numImg, int sL)

int2	getImagePos (int image)

	~Image4DCudaArray ()

void	copyToDevice (T *src, bool async=false, cudaStream_t s=0)

void	copyToHost (T *dst, bool async=false, cudaStream_t s=0)

void	clear ()

void	copyImageToHost (int img, int layer, T *dst, bool async=false, cudaStream_t s=0)

void	copyImageToDevice (int img, int layer, T *src, bool async=false, cudaStream_t s=0)

void	free ()

Public Attributes
cudaArray_t	array

int	imgw

int	imgh

int	layerw

int	layerh

int	nlayers

int	numImg

Detailed Description

template<typename T>
struct Image4DCudaArray< T >

Definition at line 212 of file cudaImageList.h.

Constructor & Destructor Documentation

§ Image4DCudaArray()

template<typename T >

Image4DCudaArray< T >::Image4DCudaArray	(	int	sx,
		int	sy,
		int	numImg,
		int	sL
	)

inline

Definition at line 271 of file cudaImageList.h.

                                                           {
         array = 0;
         int d;
         cudaGetDevice(&d);
         cudaDeviceProp prop;
         cudaGetDeviceProperties(&prop, d);
         
         imgw = sx;
         imgh = sy;
         this->numImg = numImg;
 
 //      layerh = (int)(prop.maxSurface2DLayered[1] / imgh);
         layerh = 2048 / imgh;
         layerw = (numImg + layerh - 1) / layerh;
         nlayers = sL;
 
         dbgprintf("creating image4D: %d layers of %d x %d images of %d x %d (%dx%dx%d)\n", 
             sL, layerw, layerh, imgw, imgh, getExtent().width,getExtent().height,getExtent().depth);
 
         cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
         cudaError_t err = cudaMalloc3DArray(&array, &desc, getExtent(), cudaArrayLayered | cudaArraySurfaceLoadStore);
         //cudaError_t err = cudaMalloc3DArray(&array, &desc, getExtent(), cudaArraySurfaceLoadStore);
         if (err != cudaSuccess) {
             throw std::bad_alloc(SPrintf("CUDA error during cudaSurf2DList(): %s", cudaGetErrorString(err)).c_str());
         }
     }

§ ~Image4DCudaArray()

template<typename T >

Image4DCudaArray< T >::~Image4DCudaArray ( )

inline

Definition at line 303 of file cudaImageList.h.

                         {
         free();
     }

Member Function Documentation

§ bind() [1/2]

template<typename T >

void Image4DCudaArray< T >::bind ( texture< T, cudaTextureType2DLayered, cudaReadModeElementType > & texref )

inline

Definition at line 256 of file cudaImageList.h.

                                                                                      {
         cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
         CheckCUDAError( cudaBindTextureToArray(texref, array, &desc) );
     }

§ bind() [2/2]

template<typename T >

void Image4DCudaArray< T >::bind ( surface< void, cudaSurfaceType2DLayered > & surf )

inline

Definition at line 264 of file cudaImageList.h.

                                                              {
         cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
         CheckCUDAError( cudaBindSurfaceToArray(surf, array) );
     }

§ clear()

template<typename T >

void Image4DCudaArray< T >::clear ( )

inline

Definition at line 323 of file cudaImageList.h.

     {
         // create a new black image in device memory and use to it clear all the layers
         T* d;
         size_t srcpitch;
         CheckCUDAError( cudaMallocPitch(&d, &srcpitch, sizeof(T)*imgw, imgh) );
         CheckCUDAError( cudaMemset2D(d, srcpitch, 0, sizeof(T)*imgw, imgh) );
 
         cudaMemcpy3DParms p = {0};
         p.dstArray = array;
         p.extent = make_cudaExtent(imgw,imgh,1);
         p.kind = cudaMemcpyDeviceToDevice;
         p.srcPtr = make_cudaPitchedPtr(d, srcpitch, sizeof(T)*imgw, imgh);
         for (int l=0;l<nlayers;l++)
             for (int img=0;img<numImg;img++) {
                 int2 imgpos = getImagePos(img);
                 p.dstPos.z = l;
                 p.dstPos.x = imgpos.x;
                 p.dstPos.y = imgpos.y;
                 CheckCUDAError( cudaMemcpy3D(&p) );
             }
         CheckCUDAError( cudaFree(d) );
     }

§ copyImageToDevice()

template<typename T >

void Image4DCudaArray< T >::copyImageToDevice	(	int	img,
		int	layer,
		T *	src,
		bool	async = `false`,
		cudaStream_t	s = `0`
	)

inline

Definition at line 370 of file cudaImageList.h.

     {
         // Memcpy3D needs the right pitch for the source, so we first need to copy it to 2D pitched memory before moving the data to the cuda array
 //      cudaMallocPitch(
 
         cudaMemcpy3DParms p = {0};
         p.dstArray = array;
         int2 imgpos = getImagePos(img);
 
         //The srcPos and dstPos fields are optional offsets into the source and destination objects and are defined in units of each object's elements. 
         // The element for a host or device pointer is assumed to be unsigned char. For CUDA arrays, positions must be in the range [0, 2048) for any dimension. 
         p.dstPos.z = layer;
         p.dstPos.x = imgpos.x;
         p.dstPos.y = imgpos.y;
         p.extent = make_cudaExtent(imgw,imgh,1);
         p.kind = cudaMemcpyHostToDevice;
         p.srcPtr = make_cudaPitchedPtr(src, sizeof(T)*imgw, sizeof(T)*imgw, imgh);
         if (async)
             CheckCUDAError( cudaMemcpy3DAsync(&p, s) );
         else
             CheckCUDAError( cudaMemcpy3D(&p) );
     }

§ copyImageToHost()

template<typename T >

void Image4DCudaArray< T >::copyImageToHost	(	int	img,
		int	layer,
		T *	dst,
		bool	async = `false`,
		cudaStream_t	s = `0`
	)

inline

Definition at line 348 of file cudaImageList.h.

     {
         // According to CUDA docs:
         //      The extent field defines the dimensions of the transferred area in elements. 
         //      If a CUDA array is participating in the copy, the extent is defined in terms of that array's elements. 
         //      If no CUDA array is participating in the copy then the extents are defined in elements of unsigned char.
 
         cudaMemcpy3DParms p = {0};
         p.srcArray = array;
         p.extent = make_cudaExtent(imgw,imgh,1);
         p.kind = cudaMemcpyDeviceToHost;
         p.srcPos.z = layer;
         int2 imgpos = getImagePos(img);
         p.srcPos.x = imgpos.x;
         p.srcPos.y = imgpos.y;
         p.dstPtr = make_cudaPitchedPtr(dst, sizeof(T)*imgw, sizeof(T)*imgw, imgh);
         if (async)
             CheckCUDAError( cudaMemcpy3DAsync(&p, s) );
         else
             CheckCUDAError( cudaMemcpy3D(&p) );
     }

§ copyToDevice()

template<typename T >

void Image4DCudaArray< T >::copyToDevice	(	T *	src,
		bool	async = `false`,
		cudaStream_t	s = `0`
	)

inline

Definition at line 307 of file cudaImageList.h.

     {
         for (int L=0;L<nlayers;L++) {
             for (int i=0;i<numImg;i++)
                 copyImageToDevice(i, L, &src[ imgw * imgh * ( numImg * L + i ) ], async, s);
         }
     }

§ copyToHost()

template<typename T >

void Image4DCudaArray< T >::copyToHost	(	T *	dst,
		bool	async = `false`,
		cudaStream_t	s = `0`
	)

inline

Definition at line 315 of file cudaImageList.h.

     {
         for (int L=0;L<nlayers;L++) {
             for (int i=0;i<numImg;i++)
                 copyImageToHost(i, L, &dst[ imgw * imgh * ( numImg * L + i ) ], async, s);
         }
     }

§ free()

template<typename T >

void Image4DCudaArray< T >::free ( )

inline

Definition at line 393 of file cudaImageList.h.

                 {
         if (array) {
             CheckCUDAError( cudaFreeArray(array) );
             array = 0;
         }
     }

§ getExtent()

template<typename T >

cudaExtent Image4DCudaArray< T >::getExtent ( )

inline

Definition at line 222 of file cudaImageList.h.

                            {
         return make_cudaExtent(imgw * layerw, imgh * layerh, nlayers);
     }

§ getImagePos()

template<typename T >

int2 Image4DCudaArray< T >::getImagePos ( int image )

inline

Definition at line 298 of file cudaImageList.h.

                                 {
         int2 pos = { imgw * ( image % layerw ), imgh * ( image / layerw ) };
         return pos;
     }

§ kernelInst()

template<typename T >

KernelInst Image4DCudaArray< T >::kernelInst ( )

inline

Definition at line 249 of file cudaImageList.h.

                             {
         KernelInst inst;
         inst.imgw = imgw; inst.imgh = imgh;
         inst.layerw = layerw;
         return inst;
     }

§ unbind()

template<typename T >

void Image4DCudaArray< T >::unbind ( texture< T, cudaTextureType2DLayered, cudaReadModeElementType > & texref )

inline

Definition at line 260 of file cudaImageList.h.

                                                                                        {
         cudaUnbindTexture(texref);
     }

Member Data Documentation

§ array

template<typename T >

cudaArray_t Image4DCudaArray< T >::array

Definition at line 214 of file cudaImageList.h.

§ imgh

template<typename T >

int Image4DCudaArray< T >::imgh

Definition at line 215 of file cudaImageList.h.

§ imgw

template<typename T >

int Image4DCudaArray< T >::imgw

Definition at line 215 of file cudaImageList.h.

§ layerh

template<typename T >

int Image4DCudaArray< T >::layerh

Definition at line 216 of file cudaImageList.h.

§ layerw

template<typename T >

int Image4DCudaArray< T >::layerw

Definition at line 216 of file cudaImageList.h.

§ nlayers

template<typename T >

int Image4DCudaArray< T >::nlayers

Definition at line 217 of file cudaImageList.h.

§ numImg

template<typename T >

int Image4DCudaArray< T >::numImg

Definition at line 218 of file cudaImageList.h.

The documentation for this struct was generated from the following file:

cudatrack/cudaImageList.h

Classes

Public Member Functions

Public Attributes

Detailed Description

template<typename T> struct Image4DCudaArray< T >

Constructor & Destructor Documentation

§ Image4DCudaArray()

§ ~Image4DCudaArray()

Member Function Documentation

§ bind() [1/2]

§ bind() [2/2]

§ clear()

§ copyImageToDevice()

§ copyImageToHost()

§ copyToDevice()

§ copyToHost()

§ free()

§ getExtent()

§ getImagePos()

§ kernelInst()

§ unbind()

Member Data Documentation

§ array

§ imgh

§ imgw

§ layerh

§ layerw

§ nlayers

§ numImg

template<typename T>
struct Image4DCudaArray< T >