QTrk
Classes | Public Member Functions | Public Attributes | List of all members
Image4DCudaArray< T > Struct Template Reference

#include <cudaImageList.h>

Classes

struct  KernelInst
 

Public Member Functions

cudaExtent getExtent ()
 
KernelInst kernelInst ()
 
void bind (texture< T, cudaTextureType2DLayered, cudaReadModeElementType > &texref)
 
void unbind (texture< T, cudaTextureType2DLayered, cudaReadModeElementType > &texref)
 
void bind (surface< void, cudaSurfaceType2DLayered > &surf)
 
 Image4DCudaArray (int sx, int sy, int numImg, int sL)
 
int2 getImagePos (int image)
 
 ~Image4DCudaArray ()
 
void copyToDevice (T *src, bool async=false, cudaStream_t s=0)
 
void copyToHost (T *dst, bool async=false, cudaStream_t s=0)
 
void clear ()
 
void copyImageToHost (int img, int layer, T *dst, bool async=false, cudaStream_t s=0)
 
void copyImageToDevice (int img, int layer, T *src, bool async=false, cudaStream_t s=0)
 
void free ()
 

Public Attributes

cudaArray_t array
 
int imgw
 
int imgh
 
int layerw
 
int layerh
 
int nlayers
 
int numImg
 

Detailed Description

template<typename T>
struct Image4DCudaArray< T >

Definition at line 212 of file cudaImageList.h.

Constructor & Destructor Documentation

§ Image4DCudaArray()

template<typename T >
Image4DCudaArray< T >::Image4DCudaArray ( int  sx,
int  sy,
int  numImg,
int  sL 
)
inline

Definition at line 271 of file cudaImageList.h.

271  {
272  array = 0;
273  int d;
274  cudaGetDevice(&d);
275  cudaDeviceProp prop;
276  cudaGetDeviceProperties(&prop, d);
277 
278  imgw = sx;
279  imgh = sy;
280  this->numImg = numImg;
281 
282 // layerh = (int)(prop.maxSurface2DLayered[1] / imgh);
283  layerh = 2048 / imgh;
284  layerw = (numImg + layerh - 1) / layerh;
285  nlayers = sL;
286 
287  dbgprintf("creating image4D: %d layers of %d x %d images of %d x %d (%dx%dx%d)\n",
288  sL, layerw, layerh, imgw, imgh, getExtent().width,getExtent().height,getExtent().depth);
289 
290  cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
291  cudaError_t err = cudaMalloc3DArray(&array, &desc, getExtent(), cudaArrayLayered | cudaArraySurfaceLoadStore);
292  //cudaError_t err = cudaMalloc3DArray(&array, &desc, getExtent(), cudaArraySurfaceLoadStore);
293  if (err != cudaSuccess) {
294  throw std::bad_alloc(SPrintf("CUDA error during cudaSurf2DList(): %s", cudaGetErrorString(err)).c_str());
295  }
296  }
cudaArray_t array
cudaExtent getExtent()
void dbgprintf(const char *fmt,...)
Definition: utils.cpp:149
std::string SPrintf(const char *fmt,...)
Definition: utils.cpp:132

§ ~Image4DCudaArray()

template<typename T >
Image4DCudaArray< T >::~Image4DCudaArray ( )
inline

Definition at line 303 of file cudaImageList.h.

303  {
304  free();
305  }

Member Function Documentation

§ bind() [1/2]

template<typename T >
void Image4DCudaArray< T >::bind ( texture< T, cudaTextureType2DLayered, cudaReadModeElementType > &  texref)
inline

Definition at line 256 of file cudaImageList.h.

256  {
257  cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
258  CheckCUDAError( cudaBindTextureToArray(texref, array, &desc) );
259  }
cudaArray_t array
void CheckCUDAError(cufftResult_t err)
Definition: gpu_utils.h:36

§ bind() [2/2]

template<typename T >
void Image4DCudaArray< T >::bind ( surface< void, cudaSurfaceType2DLayered > &  surf)
inline

Definition at line 264 of file cudaImageList.h.

264  {
265  cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
266  CheckCUDAError( cudaBindSurfaceToArray(surf, array) );
267  }
cudaArray_t array
void CheckCUDAError(cufftResult_t err)
Definition: gpu_utils.h:36

§ clear()

template<typename T >
void Image4DCudaArray< T >::clear ( )
inline

Definition at line 323 of file cudaImageList.h.

324  {
325  // create a new black image in device memory and use to it clear all the layers
326  T* d;
327  size_t srcpitch;
328  CheckCUDAError( cudaMallocPitch(&d, &srcpitch, sizeof(T)*imgw, imgh) );
329  CheckCUDAError( cudaMemset2D(d, srcpitch, 0, sizeof(T)*imgw, imgh) );
330 
331  cudaMemcpy3DParms p = {0};
332  p.dstArray = array;
333  p.extent = make_cudaExtent(imgw,imgh,1);
334  p.kind = cudaMemcpyDeviceToDevice;
335  p.srcPtr = make_cudaPitchedPtr(d, srcpitch, sizeof(T)*imgw, imgh);
336  for (int l=0;l<nlayers;l++)
337  for (int img=0;img<numImg;img++) {
338  int2 imgpos = getImagePos(img);
339  p.dstPos.z = l;
340  p.dstPos.x = imgpos.x;
341  p.dstPos.y = imgpos.y;
342  CheckCUDAError( cudaMemcpy3D(&p) );
343  }
344  CheckCUDAError( cudaFree(d) );
345  }
cudaArray_t array
void CheckCUDAError(cufftResult_t err)
Definition: gpu_utils.h:36
int2 getImagePos(int image)

§ copyImageToDevice()

template<typename T >
void Image4DCudaArray< T >::copyImageToDevice ( int  img,
int  layer,
T *  src,
bool  async = false,
cudaStream_t  s = 0 
)
inline

Definition at line 370 of file cudaImageList.h.

371  {
372  // Memcpy3D needs the right pitch for the source, so we first need to copy it to 2D pitched memory before moving the data to the cuda array
373 // cudaMallocPitch(
374 
375  cudaMemcpy3DParms p = {0};
376  p.dstArray = array;
377  int2 imgpos = getImagePos(img);
378 
379  //The srcPos and dstPos fields are optional offsets into the source and destination objects and are defined in units of each object's elements.
380  // The element for a host or device pointer is assumed to be unsigned char. For CUDA arrays, positions must be in the range [0, 2048) for any dimension.
381  p.dstPos.z = layer;
382  p.dstPos.x = imgpos.x;
383  p.dstPos.y = imgpos.y;
384  p.extent = make_cudaExtent(imgw,imgh,1);
385  p.kind = cudaMemcpyHostToDevice;
386  p.srcPtr = make_cudaPitchedPtr(src, sizeof(T)*imgw, sizeof(T)*imgw, imgh);
387  if (async)
388  CheckCUDAError( cudaMemcpy3DAsync(&p, s) );
389  else
390  CheckCUDAError( cudaMemcpy3D(&p) );
391  }
cudaArray_t array
void CheckCUDAError(cufftResult_t err)
Definition: gpu_utils.h:36
int2 getImagePos(int image)

§ copyImageToHost()

template<typename T >
void Image4DCudaArray< T >::copyImageToHost ( int  img,
int  layer,
T *  dst,
bool  async = false,
cudaStream_t  s = 0 
)
inline

Definition at line 348 of file cudaImageList.h.

349  {
350  // According to CUDA docs:
351  // The extent field defines the dimensions of the transferred area in elements.
352  // If a CUDA array is participating in the copy, the extent is defined in terms of that array's elements.
353  // If no CUDA array is participating in the copy then the extents are defined in elements of unsigned char.
354 
355  cudaMemcpy3DParms p = {0};
356  p.srcArray = array;
357  p.extent = make_cudaExtent(imgw,imgh,1);
358  p.kind = cudaMemcpyDeviceToHost;
359  p.srcPos.z = layer;
360  int2 imgpos = getImagePos(img);
361  p.srcPos.x = imgpos.x;
362  p.srcPos.y = imgpos.y;
363  p.dstPtr = make_cudaPitchedPtr(dst, sizeof(T)*imgw, sizeof(T)*imgw, imgh);
364  if (async)
365  CheckCUDAError( cudaMemcpy3DAsync(&p, s) );
366  else
367  CheckCUDAError( cudaMemcpy3D(&p) );
368  }
cudaArray_t array
void CheckCUDAError(cufftResult_t err)
Definition: gpu_utils.h:36
int2 getImagePos(int image)

§ copyToDevice()

template<typename T >
void Image4DCudaArray< T >::copyToDevice ( T *  src,
bool  async = false,
cudaStream_t  s = 0 
)
inline

Definition at line 307 of file cudaImageList.h.

308  {
309  for (int L=0;L<nlayers;L++) {
310  for (int i=0;i<numImg;i++)
311  copyImageToDevice(i, L, &src[ imgw * imgh * ( numImg * L + i ) ], async, s);
312  }
313  }
void copyImageToDevice(int img, int layer, T *src, bool async=false, cudaStream_t s=0)

§ copyToHost()

template<typename T >
void Image4DCudaArray< T >::copyToHost ( T *  dst,
bool  async = false,
cudaStream_t  s = 0 
)
inline

Definition at line 315 of file cudaImageList.h.

316  {
317  for (int L=0;L<nlayers;L++) {
318  for (int i=0;i<numImg;i++)
319  copyImageToHost(i, L, &dst[ imgw * imgh * ( numImg * L + i ) ], async, s);
320  }
321  }
void copyImageToHost(int img, int layer, T *dst, bool async=false, cudaStream_t s=0)

§ free()

template<typename T >
void Image4DCudaArray< T >::free ( )
inline

Definition at line 393 of file cudaImageList.h.

393  {
394  if (array) {
395  CheckCUDAError( cudaFreeArray(array) );
396  array = 0;
397  }
398  }
cudaArray_t array
void CheckCUDAError(cufftResult_t err)
Definition: gpu_utils.h:36

§ getExtent()

template<typename T >
cudaExtent Image4DCudaArray< T >::getExtent ( )
inline

Definition at line 222 of file cudaImageList.h.

222  {
223  return make_cudaExtent(imgw * layerw, imgh * layerh, nlayers);
224  }

§ getImagePos()

template<typename T >
int2 Image4DCudaArray< T >::getImagePos ( int  image)
inline

Definition at line 298 of file cudaImageList.h.

298  {
299  int2 pos = { imgw * ( image % layerw ), imgh * ( image / layerw ) };
300  return pos;
301  }

§ kernelInst()

template<typename T >
KernelInst Image4DCudaArray< T >::kernelInst ( )
inline

Definition at line 249 of file cudaImageList.h.

249  {
250  KernelInst inst;
251  inst.imgw = imgw; inst.imgh = imgh;
252  inst.layerw = layerw;
253  return inst;
254  }

§ unbind()

template<typename T >
void Image4DCudaArray< T >::unbind ( texture< T, cudaTextureType2DLayered, cudaReadModeElementType > &  texref)
inline

Definition at line 260 of file cudaImageList.h.

260  {
261  cudaUnbindTexture(texref);
262  }

Member Data Documentation

§ array

template<typename T >
cudaArray_t Image4DCudaArray< T >::array

Definition at line 214 of file cudaImageList.h.

§ imgh

template<typename T >
int Image4DCudaArray< T >::imgh

Definition at line 215 of file cudaImageList.h.

§ imgw

template<typename T >
int Image4DCudaArray< T >::imgw

Definition at line 215 of file cudaImageList.h.

§ layerh

template<typename T >
int Image4DCudaArray< T >::layerh

Definition at line 216 of file cudaImageList.h.

§ layerw

template<typename T >
int Image4DCudaArray< T >::layerw

Definition at line 216 of file cudaImageList.h.

§ nlayers

template<typename T >
int Image4DCudaArray< T >::nlayers

Definition at line 217 of file cudaImageList.h.

§ numImg

template<typename T >
int Image4DCudaArray< T >::numImg

Definition at line 218 of file cudaImageList.h.


The documentation for this struct was generated from the following file: