QTrk
Functions
CUDA Kernels

All available CUDA Kernels to run on the GPU. More...

Functions

template<typename T >
static __device__ T interpolate (T a, T b, float x)
 
template<typename TImageSampler >
__device__ float2 BgCorrectedCOM (int idx, cudaImageListf images, float correctionFactor, float *pMean)
 
template<typename TImageSampler >
__global__ void BgCorrectedCOM (int count, cudaImageListf images, float3 *d_com, float bgCorrectionFactor, float *d_imgmeans)
 
__global__ void ZLUT_ProfilesToZLUT (int njobs, cudaImageListf images, ZLUTParams params, float3 *positions, LocalizationParams *locParams, float *profiles)
 
template<typename TImageSampler >
__global__ void ZLUT_RadialProfileKernel (int njobs, cudaImageListf images, ZLUTParams params, float3 *positions, float *profiles, float *means)
 
__global__ void ZLUT_ComputeZ (int njobs, ZLUTParams params, float3 *positions, float *compareScoreBuf)
 
__global__ void ZLUT_ComputeProfileMatchScores (int njobs, ZLUTParams params, float *profiles, float *compareScoreBuf, LocalizationParams *locParams)
 
__global__ void ZLUT_NormalizeProfiles (int njobs, ZLUTParams params, float *profiles)
 
__global__ void ApplyOffsetGain (BaseKernelParams kp, cudaImageListf calib_gain, cudaImageListf calib_offset, float gainFactor, float offsetFactor)
 
template<typename TImageSampler >
__global__ void G2MLE_Compute (BaseKernelParams kp, float sigma, int iterations, float3 *initial, float3 *positions, float *I_bg, float *I_0)
 
template<typename TImageSampler , typename TImageLUT >
__global__ void ImageLUT_Sample (BaseKernelParams kp, float2 ilut_scale, float3 *positions, typename TImageLUT::KernelParams lut)
 
__global__ void ForceCUDAKernelsToLoad ()
 Empty kernel to initialize and get rid of driver overhead before calculations are started. More...
 
template<typename TImageSampler >
__device__ void ComputeQuadrantProfile (cudaImageListf &images, int idx, float *dst, const QIParams &params, int quadrant, float2 center, float mean, int angularSteps)
 
template<typename TImageSampler >
__global__ void QI_ComputeProfile (BaseKernelParams kp, float3 *positions, float *quadrants, float2 *profiles, float2 *reverseProfiles, const QIParams qiParams, float *d_radialweights, int angularSteps)
 
__global__ void QI_MultiplyWithConjugate (int n, cufftComplex *a, cufftComplex *b)
 
__device__ float QI_ComputeAxisOffset (cufftComplex *autoconv, int fftlen, float *shiftbuf)
 
__global__ void QI_OffsetPositions (int njobs, float3 *current, float3 *dst, cufftComplex *autoconv, int fftLength, float2 *offsets, float pixelsPerProfLen, float *shiftbuf)
 
template<typename TImageSampler >
__global__ void QI_ComputeQuadrants (BaseKernelParams kp, float3 *positions, float *dst_quadrants, const QIParams *params, int angularSteps)
 
__global__ void QI_QuadrantsToProfiles (BaseKernelParams kp, float *quadrants, float2 *profiles, float2 *reverseProfiles, float *d_radialweights, const QIParams *params)
 

Detailed Description

All available CUDA Kernels to run on the GPU.

Function Documentation

§ ApplyOffsetGain()

__global__ void ApplyOffsetGain ( BaseKernelParams  kp,
cudaImageListf  calib_gain,
cudaImageListf  calib_offset,
float  gainFactor,
float  offsetFactor 
)

Definition at line 169 of file Kernels.h.

170 {
171  int x = threadIdx.x + blockIdx.x * blockDim.x;
172  int y = threadIdx.y + blockIdx.y * blockDim.y;
173  int jobIdx = threadIdx.z + blockIdx.z * blockDim.z;
174 
175  if (x < kp.images.w && y < kp.images.h && jobIdx < kp.njobs) {
176  int bead = kp.locParams[jobIdx].zlutIndex;
177 
178  float value = kp.images.pixel(x,y,jobIdx);
179  float offset = calib_offset.isEmpty() ? 0 : calib_offset.pixel(x,y,bead);
180  float gain = calib_gain.isEmpty() ? 1 : calib_gain.pixel(x,y,bead);
181  kp.images.pixel(x,y,jobIdx) = (value + offset*offsetFactor) * gain*gainFactor;
182  }
183 }
CUBOTH bool isEmpty()
Definition: cudaImageList.h:33
int zlutIndex
Index of the bead/zlut.
cudaImageListf images
The images for this batch.
LocalizationParams * locParams
Additional localization parameters. Holds the ZLUT/bead index.
CUBOTH T & pixel(int x, int y, int imgIndex)
Definition: cudaImageList.h:64
int njobs
Number of jobs in the batch.

§ BgCorrectedCOM() [1/2]

template<typename TImageSampler >
__device__ float2 BgCorrectedCOM ( int  idx,
cudaImageListf  images,
float  correctionFactor,
float *  pMean 
)

Definition at line 18 of file Kernels.h.

19 {
20  int imgsize = images.w*images.h;
21  float sum=0, sum2=0;
22  float momentX=0;
23  float momentY=0;
24 
25  for (int y=0;y<images.h;y++)
26  for (int x=0;x<images.w;x++) {
27  float v = TImageSampler::Index(images, x, y, idx);
28  sum += v;
29  sum2 += v*v;
30  }
31 
32  float invN = 1.0f/imgsize;
33  float mean = sum * invN;
34  float stdev = sqrtf(sum2 * invN - mean * mean);
35  sum = 0.0f;
36 
37  for (int y=0;y<images.h;y++)
38  for(int x=0;x<images.w;x++)
39  {
40  float v = TImageSampler::Index(images, x,y,idx);
41  v = fabsf(v-mean)-correctionFactor*stdev;
42  if(v<0.0f) v=0.0f;
43  sum += v;
44  momentX += x*v;
45  momentY += y*v;
46  }
47 
48  if (pMean)
49  *pMean = mean;
50 
51  float2 com;
52  com.x = momentX / (float)sum;
53  com.y = momentY / (float)sum;
54  return com;
55 }

§ BgCorrectedCOM() [2/2]

template<typename TImageSampler >
__global__ void BgCorrectedCOM ( int  count,
cudaImageListf  images,
float3 *  d_com,
float  bgCorrectionFactor,
float *  d_imgmeans 
)

Definition at line 58 of file Kernels.h.

59 {
60  int idx = threadIdx.x + blockDim.x * blockIdx.x;
61  if (idx < count) {
62  float mean;
63  float2 com = BgCorrectedCOM<TImageSampler> (idx, images, bgCorrectionFactor, &mean);
64  d_com[idx] = make_float3(com.x,com.y,0.0f);
65  d_imgmeans[idx] = mean;
66  }
67 }

§ ComputeQuadrantProfile()

template<typename TImageSampler >
__device__ void ComputeQuadrantProfile ( cudaImageListf images,
int  idx,
float *  dst,
const QIParams params,
int  quadrant,
float2  center,
float  mean,
int  angularSteps 
)

Definition at line 12 of file QI_impl.h.

13 {
14  const int qmat[] = {
15  1, 1,
16  -1, 1,
17  -1, -1,
18  1, -1 };
19  int mx = qmat[2*quadrant+0];
20  int my = qmat[2*quadrant+1];
21 
22  //for (int i=0;i<params.radialSteps;i++)
23  // dst[i]=0.0f;
24 
25  float asf = (float)params.maxAngularSteps / angularSteps;
26  float rstep = (params.maxRadius - params.minRadius) / params.radialSteps;
27  for (int i=0;i<params.radialSteps; i++) {
28  float sum = 0.0f;
29  float r = params.minRadius + rstep * i;
30  int count=0;
31 
32  for (int a=0;a<angularSteps;a++) {
33  int j = (int)(asf * a);
34  float x = center.x + mx*params.cos_sin_table[j].x * r;
35  float y = center.y + my*params.cos_sin_table[j].y * r;
36  bool outside=false;
37  float v = TImageSampler::Interpolated(images, x,y, idx, outside);
38  if (!outside) {
39  sum += v;
40  count ++;
41  }
42  }
43 
44  dst[i] = count >= MIN_RADPROFILE_SMP_COUNT ? sum/count : mean;
45  }
46 }
int maxAngularSteps
Maximum amount of angular steps. Used to enable usage of QTrkSettings::qi_angstep_factor.
Definition: QI.h:12
float maxRadius
Definition: QI.h:9
int radialSteps
Definition: QI.h:10
#define MIN_RADPROFILE_SMP_COUNT
Minimum number of samples for a profile radial bin. Below this the image mean will be used...
Definition: QueuedTracker.h:74
float minRadius
Definition: QI.h:8
float2 * cos_sin_table
Reference to QI::DeviceInstance::qi_trigtable for use in kernels. Radial sampling points...
Definition: QI.h:13

§ ForceCUDAKernelsToLoad()

__global__ void ForceCUDAKernelsToLoad ( )

Empty kernel to initialize and get rid of driver overhead before calculations are started.

Definition at line 290 of file Kernels.h.

291 {
292 }

§ G2MLE_Compute()

template<typename TImageSampler >
__global__ void G2MLE_Compute ( BaseKernelParams  kp,
float  sigma,
int  iterations,
float3 *  initial,
float3 *  positions,
float *  I_bg,
float *  I_0 
)

Definition at line 187 of file Kernels.h.

188 {
189  int jobIdx = threadIdx.x + blockIdx.x * blockDim.x;
190 
191  if (jobIdx >= kp.njobs)
192  return;
193 
194  float2 pos = make_float2(initial[jobIdx].x, initial[jobIdx].y);
195  float mean = kp.imgmeans[jobIdx];
196  float I0 = mean*0.5f*kp.images.w*kp.images.h;
197  float bg = mean*0.5f;
198 
199  const float _1oSq2Sigma = 1.0f / (sqrtf(2) * sigma);
200  const float _1oSq2PiSigma = (1.0f / (sqrtf(2*3.14159265359f))) / sigma;
201  const float _1oSq2PiSigma3 = (1.0f / (sqrtf(2*3.14159265359f))) / (sigma*sigma*sigma);
202 
203  for (int i=0;i<iterations;i++)
204  {
205  float dL_dx = 0.0;
206  float dL_dy = 0.0;
207  float dL_dI0 = 0.0;
208  float dL_dIbg = 0.0;
209  float dL2_dx = 0.0;
210  float dL2_dy = 0.0;
211  float dL2_dI0 = 0.0;
212  float dL2_dIbg = 0.0;
213 
214  for (int y=0;y<kp.images.h;y++)
215  {
216  for (int x=0;x<kp.images.w;x++)
217  {
218  float Xexp0 = (x-pos.x + .5f) * _1oSq2Sigma;
219  float Yexp0 = (y-pos.y + .5f) * _1oSq2Sigma;
220 
221  float Xexp1 = (x-pos.x - .5f) * _1oSq2Sigma;
222  float Yexp1 = (y-pos.y - .5f) * _1oSq2Sigma;
223 
224  float DeltaX = 0.5f * erff(Xexp0) - 0.5f * erff(Xexp1);
225  float DeltaY = 0.5f * erff(Yexp0) - 0.5f * erff(Yexp1);
226  float mu = bg + I0 * DeltaX * DeltaY;
227 
228  float dmu_dx = I0*_1oSq2PiSigma * ( expf(-Xexp1*Xexp1) - expf(-Xexp0*Xexp0)) * DeltaY;
229 
230  float dmu_dy = I0*_1oSq2PiSigma * ( expf(-Yexp1*Yexp1) - expf(-Yexp0*Yexp0)) * DeltaX;
231  float dmu_dI0 = DeltaX*DeltaY;
232  float dmu_dIbg = 1;
233 
234  float smp = TImageSampler::Index(kp.images, x,y, jobIdx);
235  float f = smp / mu - 1;
236  dL_dx += dmu_dx * f;
237  dL_dy += dmu_dy * f;
238  dL_dI0 += dmu_dI0 * f;
239  dL_dIbg += dmu_dIbg * f;
240 
241  float d2mu_dx = I0*_1oSq2PiSigma3 * ( (x - pos.x - .5f) * expf (-Xexp1*Xexp1) - (x - pos.x + .5) * expf(-Xexp0*Xexp0) ) * DeltaY;
242  float d2mu_dy = I0*_1oSq2PiSigma3 * ( (y - pos.y - .5f) * expf (-Yexp1*Yexp1) - (y - pos.y + .5) * expf(-Yexp0*Yexp0) ) * DeltaX;
243  dL2_dx += d2mu_dx * f - dmu_dx*dmu_dx * smp / (mu*mu);
244  dL2_dy += d2mu_dy * f - dmu_dy*dmu_dy * smp / (mu*mu);
245  dL2_dI0 += -dmu_dI0*dmu_dI0 * smp / (mu*mu);
246  dL2_dIbg += -smp / (mu*mu);
247  }
248  }
249 
250  pos.x -= dL_dx / dL2_dx;
251  pos.y -= dL_dy / dL2_dy;
252  I0 -= dL_dI0 / dL2_dI0;
253  bg -= dL_dIbg / dL2_dIbg;
254  }
255 
256 
257  positions[jobIdx].x = pos.x;
258  positions[jobIdx].y = pos.y;
259  if (I_bg) I_bg[jobIdx] = bg;
260  if (I_0) I_0[jobIdx] = I0;
261 }
cudaImageListf images
The images for this batch.
int njobs
Number of jobs in the batch.
float * imgmeans
Array of image means.

§ ImageLUT_Sample()

template<typename TImageSampler , typename TImageLUT >
__global__ void ImageLUT_Sample ( BaseKernelParams  kp,
float2  ilut_scale,
float3 *  positions,
typename TImageLUT::KernelParams  lut 
)

Definition at line 264 of file Kernels.h.

265 {
266  // add sampled image data to
267  int x = threadIdx.x + blockIdx.x * blockDim.x;
268  int y = threadIdx.y + blockIdx.y * blockDim.y;
269  int id = threadIdx.z + blockIdx.z * blockDim.z;
270  if (x < lut.imgw && y < lut.imgh && id < kp.njobs) {
271 
272  float invMean = 1.0f / kp.imgmeans[id];
273 
274  float startx = positions[id].x - lut.imgw/2*ilut_scale.x;
275  float starty = positions[id].y - lut.imgh/2*ilut_scale.y;
276  int2 imgpos = lut.GetImagePos(kp.locParams[id].zlutPlane, kp.locParams[id].zlutIndex);
277 
278  float px = startx + x*ilut_scale.x;
279  float py = starty + y*ilut_scale.y;
280 
281  bool outside=false;
282  float v = TImageSampler::Interpolated(kp.images, px, py, id, outside);
283 
284  float org = TImageLUT::read(lut, x, y, imgpos);
285  TImageLUT::write(org+v*invMean, lut, x, y, imgpos);
286  }
287 }
int zlutIndex
Index of the bead/zlut.
cudaImageListf images
The images for this batch.
LocalizationParams * locParams
Additional localization parameters. Holds the ZLUT/bead index.
int zlutPlane
Plane number for LUT lookup. If <0 then, no zlut for this job. Not used.
int njobs
Number of jobs in the batch.
float * imgmeans
Array of image means.

§ interpolate()

template<typename T >
static __device__ T interpolate ( a,
b,
float  x 
)
static

Definition at line 15 of file Kernels.h.

15 { return a + (b-a)*x; }

§ QI_ComputeAxisOffset()

__device__ float QI_ComputeAxisOffset ( cufftComplex *  autoconv,
int  fftlen,
float *  shiftbuf 
)

Definition at line 115 of file QI_impl.h.

116 {
117  typedef float compute_t;
118  int nr = fftlen/2;
119  for(int x=0;x<fftlen;x++) {
120  shiftbuf[x] = autoconv[(x+nr)%(nr*2)].x;
121  }
122 
124 
125  compute_t maxPos = ComputeMaxInterp<compute_t>::Compute(shiftbuf, fftlen, QIWeights);
126  compute_t offset = (maxPos - nr) * (3.14159265359f / 4);
127  return offset;
128 }
const scalar_t QIWeights[QI_LSQFIT_NWEIGHTS]
Definition: cpu_tracker.cpp:32
#define QI_LSQFIT_WEIGHTS
#define QI_LSQFIT_NWEIGHTS
static CUDA_SUPPORTED_FUNC T Compute(T *data, int len, const T *weights, LsqSqQuadFit< T > *fit=0)

§ QI_ComputeProfile()

template<typename TImageSampler >
__global__ void QI_ComputeProfile ( BaseKernelParams  kp,
float3 *  positions,
float *  quadrants,
float2 *  profiles,
float2 *  reverseProfiles,
const QIParams  qiParams,
float *  d_radialweights,
int  angularSteps 
)

Definition at line 49 of file QI_impl.h.

50 {
51  int idx = threadIdx.x + blockDim.x * blockIdx.x;
52  if (idx < kp.njobs) {
53  const QIParams& qp = qiParams;
54  int fftlen = qp.radialSteps*2;
55  float* img_qdr = &quadrants[ idx * qp.radialSteps * 4 ];
56  for (int q=0;q<4;q++) {
57  ComputeQuadrantProfile<TImageSampler> (kp.images, idx, &img_qdr[q*qp.radialSteps], qp, q,
58  make_float2(positions[idx].x, positions[idx].y), kp.imgmeans[idx], angularSteps);
59  }
60 
61  int nr = qp.radialSteps;
62  qicomplex_t* imgprof = (qicomplex_t*) &profiles[idx * fftlen*2];
63  qicomplex_t* x0 = imgprof;
64  qicomplex_t* x1 = imgprof + nr*1;
65  qicomplex_t* y0 = imgprof + nr*2;
66  qicomplex_t* y1 = imgprof + nr*3;
67 
68  qicomplex_t* revprof = (qicomplex_t*)&reverseProfiles[idx*fftlen*2];
69  qicomplex_t* xrev = revprof;
70  qicomplex_t* yrev = revprof + nr*2;
71 
72  float* q0 = &img_qdr[0];
73  float* q1 = &img_qdr[nr];
74  float* q2 = &img_qdr[nr*2];
75  float* q3 = &img_qdr[nr*3];
76 
77  // Build Ix = qL(-r) || qR(r)
78  // qL = q1 + q2 (concat0)
79  // qR = q0 + q3 (concat1)
80  for(int r=0;r<nr;r++) {
81  float rw = d_radialweights[r];
82  x0[nr-r-1] = make_float2(rw*(q1[r]+q2[r]), 0);
83  x1[r] = make_float2( rw*(q0[r]+q3[r]),0);
84  }
85 
86  // Build Iy = [ qB(-r) qT(r) ]
87  // qT = q0 + q1
88  // qB = q2 + q3
89  for(int r=0;r<nr;r++) {
90  float rw = d_radialweights[r];
91  y1[r] = make_float2( rw * ( q0[r]+q1[r] ),0);
92  y0[nr-r-1] = make_float2( rw * (q2[r]+q3[r]),0);
93  }
94 
95  for(int r=0;r<nr*2;r++)
96  xrev[r] = x0[nr*2-r-1];
97  for(int r=0;r<nr*2;r++)
98  yrev[r] = y0[nr*2-r-1];
99  }
100 }
float2 qicomplex_t
Datatype for representation of complex values. 2D Float.
Definition: QI.h:4
int radialSteps
Definition: QI.h:10
Structure to hold QI settings. See QTrkComputedConfig for more info on most settings.
Definition: QI.h:7
cudaImageListf images
The images for this batch.
int njobs
Number of jobs in the batch.
float * imgmeans
Array of image means.

§ QI_ComputeQuadrants()

template<typename TImageSampler >
__global__ void QI_ComputeQuadrants ( BaseKernelParams  kp,
float3 *  positions,
float *  dst_quadrants,
const QIParams params,
int  angularSteps 
)

Definition at line 162 of file QI_impl.h.

163 {
164  int jobIdx = threadIdx.x + blockIdx.x * blockDim.x;
165  int rIdx = threadIdx.y + blockIdx.y * blockDim.y;
166  int quadrant = threadIdx.z;
167 
168  // Ori: dst[i], i = radial index
169  // float* img_qdr = &dst_quadrants[ jobIdx * params->radialSteps * 4 ];
170  // float* dst = &img_qdr[quadrant*params->radialSteps];
171  // dst[rIdx] = rIdx;
172  //count >= MIN_RADPROFILE_SMP_COUNT ? sum/count : kp.imgmeans[jobIdx];
173 
174  if (jobIdx < kp.njobs && rIdx < params->radialSteps && quadrant < 4) {
175  // The variables below could go in shared memory
176  float asf = (float)params->maxAngularSteps / angularSteps;
177  float rstep = (params->maxRadius - params->minRadius) / params->radialSteps;
178  const int qmat[] = {
179  1, 1,
180  -1, 1,
181  -1, -1,
182  1, -1 };
183 
184  // --Stop--
185 
186  int mx = qmat[2*quadrant+0];
187  int my = qmat[2*quadrant+1];
188 
189  // Ori: dst[i], i = radial index
190  // float* img_qdr = &quadrants[ idx * qp.radialSteps * 4 ];
191  // dst = &img_qdr[q*qp.radialSteps]
192  float* qdr = &dst_quadrants[ (jobIdx * 4 + quadrant) * params->radialSteps ];
193 
194  float sum = 0.0f;
195  float r = params->minRadius + rstep * rIdx;
196  float3 pos = positions[jobIdx];
197 // float mean = imgmeans[jobIdx];
198 
199  int count=0;
200  for (int a=0;a<angularSteps;a++) {
201  int j = (int)(asf * a);
202  float x = pos.x + mx*params->cos_sin_table[j].x * r;
203  float y = pos.y + my*params->cos_sin_table[j].y * r;
204  bool outside=false;
205  float v = TImageSampler::Interpolated(kp.images, x,y,jobIdx, outside);
206  if (!outside) {
207  sum += v;
208  count++;
209  }
210  }
211  qdr[rIdx] = count >= MIN_RADPROFILE_SMP_COUNT ? sum/count : kp.imgmeans[jobIdx];
212  }
213 }
int maxAngularSteps
Maximum amount of angular steps. Used to enable usage of QTrkSettings::qi_angstep_factor.
Definition: QI.h:12
float maxRadius
Definition: QI.h:9
int radialSteps
Definition: QI.h:10
#define MIN_RADPROFILE_SMP_COUNT
Minimum number of samples for a profile radial bin. Below this the image mean will be used...
Definition: QueuedTracker.h:74
cudaImageListf images
The images for this batch.
int njobs
Number of jobs in the batch.
float minRadius
Definition: QI.h:8
float * imgmeans
Array of image means.
float2 * cos_sin_table
Reference to QI::DeviceInstance::qi_trigtable for use in kernels. Radial sampling points...
Definition: QI.h:13

§ QI_MultiplyWithConjugate()

__global__ void QI_MultiplyWithConjugate ( int  n,
cufftComplex *  a,
cufftComplex *  b 
)

Definition at line 102 of file QI_impl.h.

103 {
104  //int idx = (threadIdx.y + threadIdx.x << 4) + (blockIdx.x + blockIdx.y *( (int)sqrt( (double)(n / (blockDim.x * blockDim.y)) ) + 1)) << 8;
105  //int idx = (threadIdx.x + threadIdx.y * blockDim.x) + (blockIdx.x + blockIdx.y *( (int)sqrt( (double)(n / (blockDim.x * blockDim.y)) ) + 1)) * blockDim.x * blockDim.y;
106  int idx = threadIdx.x + blockIdx.x * blockDim.x;
107  if (idx < n) {
108  cufftComplex A = a[idx];
109  cufftComplex B = b[idx];
110 
111  a[idx] = make_float2(A.x*B.x + A.y*B.y, A.y*B.x - A.x*B.y); // multiplying with conjugate
112  }
113 }

§ QI_OffsetPositions()

__global__ void QI_OffsetPositions ( int  njobs,
float3 *  current,
float3 *  dst,
cufftComplex *  autoconv,
int  fftLength,
float2 *  offsets,
float  pixelsPerProfLen,
float *  shiftbuf 
)

Definition at line 130 of file QI_impl.h.

131 {
132  int idx = threadIdx.x + blockIdx.x * blockDim.x;
133 
134  if (idx < njobs) {
135  float* shifted = &shiftbuf[ idx * fftLength ];
136 
137  // X
138  cufftComplex* autoconvX = &autoconv[idx * fftLength * 2];
139  float xoffset = QI_ComputeAxisOffset(autoconvX, fftLength, shifted);
140 
141  cufftComplex* autoconvY = autoconvX + fftLength;
142  float yoffset = QI_ComputeAxisOffset(autoconvY, fftLength, shifted);
143 
144  dst[idx].x = current[idx].x + xoffset * pixelsPerProfLen;
145  dst[idx].y = current[idx].y + yoffset * pixelsPerProfLen;
146  dst[idx].z = current[idx].z;
147 
148  if (offsets)
149  offsets[idx] = make_float2( xoffset, yoffset);
150  }
151 }
__device__ float QI_ComputeAxisOffset(cufftComplex *autoconv, int fftlen, float *shiftbuf)
Definition: QI_impl.h:115

§ QI_QuadrantsToProfiles()

__global__ void QI_QuadrantsToProfiles ( BaseKernelParams  kp,
float *  quadrants,
float2 *  profiles,
float2 *  reverseProfiles,
float *  d_radialweights,
const QIParams params 
)

Definition at line 215 of file QI_impl.h.

216 {
217 //ComputeQuadrantProfile(cudaImageListf& images, int idx, float* dst, const QIParams& params, int quadrant, float2 center)
218  int idx = threadIdx.x + blockDim.x * blockIdx.x;
219  if (idx < kp.njobs) {
220  int fftlen = params->radialSteps*2;
221  float* img_qdr = &quadrants[ idx * params->radialSteps * 4 ];
222  // for (int q=0;q<4;q++)
223  //ComputeQuadrantProfile<TImageSampler> (images, idx, &img_qdr[q*params->radialSteps], params, q, img_means[idx], make_float2(positions[idx].x, positions[idx].y));
224 
225  int nr = params->radialSteps;
226  qicomplex_t* imgprof = (qicomplex_t*) &profiles[idx * fftlen*2];
227  qicomplex_t* x0 = imgprof;
228  qicomplex_t* x1 = imgprof + nr*1;
229  qicomplex_t* y0 = imgprof + nr*2;
230  qicomplex_t* y1 = imgprof + nr*3;
231 
232  qicomplex_t* revprof = (qicomplex_t*)&reverseProfiles[idx*fftlen*2];
233  qicomplex_t* xrev = revprof;
234  qicomplex_t* yrev = revprof + nr*2;
235 
236  float* q0 = &img_qdr[0];
237  float* q1 = &img_qdr[nr];
238  float* q2 = &img_qdr[nr*2];
239  float* q3 = &img_qdr[nr*3];
240 
241  // Build Ix = qL(-r) || qR(r)
242  // qL = q1 + q2 (concat0)
243  // qR = q0 + q3 (concat1)
244  for(int r=0;r<nr;r++) {
245  float rw = d_radialweights[r];
246  x0[nr-r-1] = make_float2( rw * (q1[r]+q2[r]), 0);
247  x1[r] = make_float2( rw * (q0[r]+q3[r]),0);
248  }
249  // Build Iy = [ qB(-r) qT(r) ]
250  // qT = q0 + q1
251  // qB = q2 + q3
252  for(int r=0;r<nr;r++) {
253  float rw = d_radialweights[r];
254  y1[r] = make_float2( rw * (q0[r]+q1[r]),0);
255  y0[nr-r-1] = make_float2( rw * (q2[r]+q3[r]),0);
256  }
257 
258  for(int r=0;r<nr*2;r++)
259  xrev[r] = x0[nr*2-r-1];
260  for(int r=0;r<nr*2;r++)
261  yrev[r] = y0[nr*2-r-1];
262  }
263 }
float2 qicomplex_t
Datatype for representation of complex values. 2D Float.
Definition: QI.h:4
int radialSteps
Definition: QI.h:10
int njobs
Number of jobs in the batch.

§ ZLUT_ComputeProfileMatchScores()

__global__ void ZLUT_ComputeProfileMatchScores ( int  njobs,
ZLUTParams  params,
float *  profiles,
float *  compareScoreBuf,
LocalizationParams locParams 
)

Definition at line 121 of file Kernels.h.

122 {
123  int jobIdx = threadIdx.x + blockIdx.x * blockDim.x;
124  int zPlaneIdx = threadIdx.y + blockIdx.y * blockDim.y;
125 
126  if (jobIdx >= njobs || zPlaneIdx >= params.planes)
127  return;
128 
129  float* prof = &profiles [jobIdx * params.radialSteps()];
130  auto mapping = locParams[jobIdx];
131  float diffsum = 0.0f;
132  for (int r=0;r<params.radialSteps();r++) {
133  float d = prof[r] - params.img.pixel(r, zPlaneIdx, mapping.zlutIndex);
134  if (params.zcmpwindow)
135  d *= params.zcmpwindow[r];
136  diffsum += d*d;
137  }
138 
139  compareScoreBuf[ params.planes * jobIdx + zPlaneIdx ] = -diffsum;
140 }
CUBOTH int radialSteps()
Number of radial steps in the lookup table.
int planes
The number of planes per lookup table.
cudaImageListf img
The imagelist holding the LUTs.
CUBOTH T & pixel(int x, int y, int imgIndex)
Definition: cudaImageList.h:64
float * zcmpwindow
The radial weights to use for the error curve calculation.

§ ZLUT_ComputeZ()

__global__ void ZLUT_ComputeZ ( int  njobs,
ZLUTParams  params,
float3 *  positions,
float *  compareScoreBuf 
)

Definition at line 108 of file Kernels.h.

109 {
110  int jobIdx = threadIdx.x + blockIdx.x * blockDim.x;
111 
112  if (jobIdx < njobs) {
113  float* cmp = &compareScoreBuf [params.planes * jobIdx];
114 
115  const float ZLUTFittingWeights[ZLUT_LSQFIT_NWEIGHTS] = ZLUT_LSQFIT_WEIGHTS;
116  float maxPos = ComputeMaxInterp<float, ZLUT_LSQFIT_NWEIGHTS>::Compute(cmp, params.planes, ZLUTFittingWeights);
117  positions[jobIdx].z = maxPos;
118  }
119 }
int planes
The number of planes per lookup table.
static CUDA_SUPPORTED_FUNC T Compute(T *data, int len, const T *weights, LsqSqQuadFit< T > *fit=0)
#define ZLUT_LSQFIT_NWEIGHTS
#define ZLUT_LSQFIT_WEIGHTS

§ ZLUT_NormalizeProfiles()

__global__ void ZLUT_NormalizeProfiles ( int  njobs,
ZLUTParams  params,
float *  profiles 
)

Definition at line 142 of file Kernels.h.

143 {
144  int jobIdx = threadIdx.x + blockIdx.x * blockDim.x;
145 
146  if (jobIdx < njobs) {
147  float* prof = &profiles[params.radialSteps()*jobIdx];
148 
149  // First, subtract mean
150  float mean = 0.0f;
151  for (int i=0;i<params.radialSteps();i++) {
152  mean += prof[i];
153  }
154  mean /= params.radialSteps();
155 
156  float rmsSum2 = 0.0f;
157  for (int i=0;i<params.radialSteps();i++){
158  prof[i] -= mean;
159  rmsSum2 += prof[i]*prof[i];
160  }
161 
162  // And make RMS power equal 1
163  float invTotalRms = 1.0f / sqrt(rmsSum2/params.radialSteps());
164  for (int i=0;i<params.radialSteps();i++)
165  prof[i] *= invTotalRms;
166  }
167 }
CUBOTH int radialSteps()
Number of radial steps in the lookup table.
vector3< T > sqrt(const vector3< T > &a)
Definition: std_incl.h:112

§ ZLUT_ProfilesToZLUT()

__global__ void ZLUT_ProfilesToZLUT ( int  njobs,
cudaImageListf  images,
ZLUTParams  params,
float3 *  positions,
LocalizationParams locParams,
float *  profiles 
)

Definition at line 69 of file Kernels.h.

70 {
71  int idx = threadIdx.x + blockDim.x * blockIdx.x;
72 
73  if (idx < njobs) {
74  auto m = locParams[idx];
75  float* dst = params.GetRadialZLUT(m.zlutIndex, m.zlutPlane );
76 
77  for (int i=0;i<params.radialSteps();i++)
78  dst [i] += profiles [ params.radialSteps()*idx + i ];
79  }
80 }
CUBOTH int radialSteps()
Number of radial steps in the lookup table.
CUBOTH float * GetRadialZLUT(int bead, int plane)
Get the radial profile saved in the LUT for a particular bead and plane.

§ ZLUT_RadialProfileKernel()

template<typename TImageSampler >
__global__ void ZLUT_RadialProfileKernel ( int  njobs,
cudaImageListf  images,
ZLUTParams  params,
float3 *  positions,
float *  profiles,
float *  means 
)

Definition at line 84 of file Kernels.h.

85 {
86  int jobIdx = threadIdx.x + blockIdx.x * blockDim.x;
87  int radialIdx = threadIdx.y + blockIdx.y * blockDim.y;
88 
89  if (jobIdx >= njobs || radialIdx >= params.radialSteps())
90  return;
91 
92  float* dstprof = &profiles[params.radialSteps() * jobIdx];
93  float r = params.minRadius + (params.maxRadius-params.minRadius)*radialIdx/params.radialSteps();
94  float sum = 0.0f;
95  int count = 0;
96 
97  for (int i=0;i<params.angularSteps;i++) {
98  float x = positions[jobIdx].x + params.trigtable[i].x * r;
99  float y = positions[jobIdx].y + params.trigtable[i].y * r;
100 
101  bool outside=false;
102  sum += TImageSampler::Interpolated(images, x,y, jobIdx, outside);
103  if (!outside) count++;
104  }
105  dstprof [radialIdx] = count>MIN_RADPROFILE_SMP_COUNT ? sum/count : means[jobIdx];
106 }
CUBOTH int radialSteps()
Number of radial steps in the lookup table.
float2 * trigtable
Array of precomputed radial spoke sampling points (cos,sin pairs)
#define MIN_RADPROFILE_SMP_COUNT
Minimum number of samples for a profile radial bin. Below this the image mean will be used...
Definition: QueuedTracker.h:74
int angularSteps
The number of angular steps used to generate the lookup table.
float maxRadius
Maximum radial distance in pixels of the sampling area.
float minRadius
Radius in pixels of the starting point of the sampling area.