All available CUDA Kernels to run on the GPU. More...

Functions
template<typename T >
static __device__ T	interpolate (T a, T b, float x)

template<typename TImageSampler >
__device__ float2	BgCorrectedCOM (int idx, cudaImageListf images, float correctionFactor, float *pMean)

template<typename TImageSampler >
__global__ void	BgCorrectedCOM (int count, cudaImageListf images, float3 d_com, float bgCorrectionFactor, float d_imgmeans)

__global__ void	ZLUT_ProfilesToZLUT (int njobs, cudaImageListf images, ZLUTParams params, float3 positions, LocalizationParams locParams, float *profiles)

template<typename TImageSampler >
__global__ void	ZLUT_RadialProfileKernel (int njobs, cudaImageListf images, ZLUTParams params, float3 positions, float profiles, float *means)

__global__ void	ZLUT_ComputeZ (int njobs, ZLUTParams params, float3 positions, float compareScoreBuf)

__global__ void	ZLUT_ComputeProfileMatchScores (int njobs, ZLUTParams params, float profiles, float compareScoreBuf, LocalizationParams *locParams)

__global__ void	ZLUT_NormalizeProfiles (int njobs, ZLUTParams params, float *profiles)

__global__ void	ApplyOffsetGain (BaseKernelParams kp, cudaImageListf calib_gain, cudaImageListf calib_offset, float gainFactor, float offsetFactor)

template<typename TImageSampler >
__global__ void	G2MLE_Compute (BaseKernelParams kp, float sigma, int iterations, float3 initial, float3 positions, float I_bg, float I_0)

template<typename TImageSampler , typename TImageLUT >
__global__ void	ImageLUT_Sample (BaseKernelParams kp, float2 ilut_scale, float3 *positions, typename TImageLUT::KernelParams lut)

__global__ void	ForceCUDAKernelsToLoad ()
	Empty kernel to initialize and get rid of driver overhead before calculations are started. More...

template<typename TImageSampler >
__device__ void	ComputeQuadrantProfile (cudaImageListf &images, int idx, float *dst, const QIParams &params, int quadrant, float2 center, float mean, int angularSteps)

template<typename TImageSampler >
__global__ void	QI_ComputeProfile (BaseKernelParams kp, float3 positions, float quadrants, float2 profiles, float2 reverseProfiles, const QIParams qiParams, float *d_radialweights, int angularSteps)

__global__ void	QI_MultiplyWithConjugate (int n, cufftComplex a, cufftComplex b)

__device__ float	QI_ComputeAxisOffset (cufftComplex autoconv, int fftlen, float shiftbuf)

__global__ void	QI_OffsetPositions (int njobs, float3 current, float3 dst, cufftComplex autoconv, int fftLength, float2 offsets, float pixelsPerProfLen, float *shiftbuf)

template<typename TImageSampler >
__global__ void	QI_ComputeQuadrants (BaseKernelParams kp, float3 positions, float dst_quadrants, const QIParams *params, int angularSteps)

__global__ void	QI_QuadrantsToProfiles (BaseKernelParams kp, float quadrants, float2 profiles, float2 reverseProfiles, float d_radialweights, const QIParams *params)

Detailed Description

All available CUDA Kernels to run on the GPU.

Function Documentation

§ ApplyOffsetGain()

__global__ void ApplyOffsetGain	(	BaseKernelParams	kp,
		cudaImageListf	calib_gain,
		cudaImageListf	calib_offset,
		float	gainFactor,
		float	offsetFactor
	)

Definition at line 169 of file Kernels.h.

 {
     int x = threadIdx.x + blockIdx.x * blockDim.x;
     int y = threadIdx.y + blockIdx.y * blockDim.y;
     int jobIdx = threadIdx.z + blockIdx.z * blockDim.z;
 
     if (x < kp.images.w && y < kp.images.h && jobIdx < kp.njobs) {
         int bead = kp.locParams[jobIdx].zlutIndex;
 
         float value = kp.images.pixel(x,y,jobIdx);
         float offset = calib_offset.isEmpty() ? 0 : calib_offset.pixel(x,y,bead);
         float gain = calib_gain.isEmpty() ? 1 : calib_gain.pixel(x,y,bead);
         kp.images.pixel(x,y,jobIdx) = (value + offset*offsetFactor) * gain*gainFactor;
     }
 }

§ BgCorrectedCOM() [1/2]

template<typename TImageSampler >

__device__ float2 BgCorrectedCOM	(	int	idx,
		cudaImageListf	images,
		float	correctionFactor,
		float *	pMean
	)

Definition at line 18 of file Kernels.h.

 {
     int imgsize = images.w*images.h;
     float sum=0, sum2=0;
     float momentX=0;
     float momentY=0;
 
     for (int y=0;y<images.h;y++)
         for (int x=0;x<images.w;x++) {
             float v = TImageSampler::Index(images, x, y, idx);
             sum += v;
             sum2 += v*v;
         }
 
     float invN = 1.0f/imgsize;
     float mean = sum * invN;
     float stdev = sqrtf(sum2 * invN - mean * mean);
     sum = 0.0f;
 
     for (int y=0;y<images.h;y++)
         for(int x=0;x<images.w;x++)
         {
             float v = TImageSampler::Index(images, x,y,idx);
             v = fabsf(v-mean)-correctionFactor*stdev;
             if(v<0.0f) v=0.0f;
             sum += v;
             momentX += x*v;
             momentY += y*v;
         }
 
     if (pMean)
         *pMean = mean;
 
     float2 com;
     com.x = momentX / (float)sum;
     com.y = momentY / (float)sum;
     return com;
 }

§ BgCorrectedCOM() [2/2]

template<typename TImageSampler >

__global__ void BgCorrectedCOM	(	int	count,
		cudaImageListf	images,
		float3 *	d_com,
		float	bgCorrectionFactor,
		float *	d_imgmeans
	)

Definition at line 58 of file Kernels.h.

 {
     int idx = threadIdx.x + blockDim.x * blockIdx.x;
     if (idx < count) {
         float mean;
         float2 com = BgCorrectedCOM<TImageSampler> (idx, images, bgCorrectionFactor, &mean);
         d_com[idx] = make_float3(com.x,com.y,0.0f);
         d_imgmeans[idx] = mean;
     }
 }

§ ComputeQuadrantProfile()

template<typename TImageSampler >

__device__ void ComputeQuadrantProfile	(	cudaImageListf &	images,
		int	idx,
		float *	dst,
		const QIParams &	params,
		int	quadrant,
		float2	center,
		float	mean,
		int	angularSteps
	)

Definition at line 12 of file QI_impl.h.

 {
     const int qmat[] = {
         1, 1,
         -1, 1,
         -1, -1,
         1, -1 };
     int mx = qmat[2*quadrant+0];
     int my = qmat[2*quadrant+1];
 
     //for (int i=0;i<params.radialSteps;i++)
     //  dst[i]=0.0f;
     
     float asf = (float)params.maxAngularSteps / angularSteps;
     float rstep = (params.maxRadius - params.minRadius) / params.radialSteps;
     for (int i=0;i<params.radialSteps; i++) {
         float sum = 0.0f;
         float r = params.minRadius + rstep * i;
         int count=0;
 
         for (int a=0;a<angularSteps;a++) {
             int j = (int)(asf * a);
             float x = center.x + mx*params.cos_sin_table[j].x * r;
             float y = center.y + my*params.cos_sin_table[j].y * r;
             bool outside=false;
             float v = TImageSampler::Interpolated(images, x,y, idx, outside);
             if (!outside) {
                 sum += v;
                 count ++;
             }
         }
 
         dst[i] = count >= MIN_RADPROFILE_SMP_COUNT ? sum/count : mean;
     }
 }

§ ForceCUDAKernelsToLoad()

__global__ void ForceCUDAKernelsToLoad ( )

Empty kernel to initialize and get rid of driver overhead before calculations are started.

Definition at line 290 of file Kernels.h.

291 {

292 }

§ G2MLE_Compute()

template<typename TImageSampler >

__global__ void G2MLE_Compute	(	BaseKernelParams	kp,
		float	sigma,
		int	iterations,
		float3 *	initial,
		float3 *	positions,
		float *	I_bg,
		float *	I_0
	)

Definition at line 187 of file Kernels.h.

 {
     int jobIdx = threadIdx.x + blockIdx.x * blockDim.x;
 
     if (jobIdx >= kp.njobs)
         return;
 
     float2 pos = make_float2(initial[jobIdx].x, initial[jobIdx].y);
     float mean = kp.imgmeans[jobIdx];
     float I0 = mean*0.5f*kp.images.w*kp.images.h;
     float bg = mean*0.5f;
 
     const float _1oSq2Sigma = 1.0f / (sqrtf(2) * sigma);
     const float _1oSq2PiSigma = (1.0f / (sqrtf(2*3.14159265359f))) / sigma;
     const float _1oSq2PiSigma3 = (1.0f / (sqrtf(2*3.14159265359f))) / (sigma*sigma*sigma);
 
     for (int i=0;i<iterations;i++)
     {
         float dL_dx = 0.0; 
         float dL_dy = 0.0; 
         float dL_dI0 = 0.0;
         float dL_dIbg = 0.0;
         float dL2_dx = 0.0;
         float dL2_dy = 0.0;
         float dL2_dI0 = 0.0;
         float dL2_dIbg = 0.0;
                 
         for (int y=0;y<kp.images.h;y++)
         {
             for (int x=0;x<kp.images.w;x++)
             {
                 float Xexp0 = (x-pos.x + .5f) * _1oSq2Sigma;
                 float Yexp0 = (y-pos.y + .5f) * _1oSq2Sigma;
         
                 float Xexp1 = (x-pos.x - .5f) * _1oSq2Sigma;
                 float Yexp1 = (y-pos.y - .5f) * _1oSq2Sigma;
                 
                 float DeltaX = 0.5f * erff(Xexp0) - 0.5f * erff(Xexp1);
                 float DeltaY = 0.5f * erff(Yexp0) - 0.5f * erff(Yexp1);
                 float mu = bg + I0 * DeltaX * DeltaY;
                 
                 float dmu_dx = I0*_1oSq2PiSigma * ( expf(-Xexp1*Xexp1) - expf(-Xexp0*Xexp0)) * DeltaY;
 
                 float dmu_dy = I0*_1oSq2PiSigma * ( expf(-Yexp1*Yexp1) - expf(-Yexp0*Yexp0)) * DeltaX;
                 float dmu_dI0 = DeltaX*DeltaY;
                 float dmu_dIbg = 1;
         
                 float smp = TImageSampler::Index(kp.images, x,y, jobIdx);
                 float f = smp / mu - 1;
                 dL_dx += dmu_dx * f;
                 dL_dy += dmu_dy * f;
                 dL_dI0 += dmu_dI0 * f;
                 dL_dIbg += dmu_dIbg * f;
 
                 float d2mu_dx = I0*_1oSq2PiSigma3 * ( (x - pos.x - .5f) * expf (-Xexp1*Xexp1) - (x - pos.x + .5) * expf(-Xexp0*Xexp0) ) * DeltaY;
                 float d2mu_dy = I0*_1oSq2PiSigma3 * ( (y - pos.y - .5f) * expf (-Yexp1*Yexp1) - (y - pos.y + .5) * expf(-Yexp0*Yexp0) ) * DeltaX;
                 dL2_dx += d2mu_dx * f - dmu_dx*dmu_dx * smp / (mu*mu);
                 dL2_dy += d2mu_dy * f - dmu_dy*dmu_dy * smp / (mu*mu);
                 dL2_dI0 += -dmu_dI0*dmu_dI0 * smp / (mu*mu);
                 dL2_dIbg += -smp / (mu*mu);
             }
         }
 
         pos.x -= dL_dx / dL2_dx;
         pos.y -= dL_dy / dL2_dy;
         I0 -= dL_dI0 / dL2_dI0;
         bg -= dL_dIbg / dL2_dIbg;
     }
     
 
     positions[jobIdx].x = pos.x;
     positions[jobIdx].y = pos.y;
     if (I_bg) I_bg[jobIdx] = bg;
     if (I_0) I_0[jobIdx] = I0;
 }

§ ImageLUT_Sample()

template<typename TImageSampler , typename TImageLUT >

__global__ void ImageLUT_Sample	(	BaseKernelParams	kp,
		float2	ilut_scale,
		float3 *	positions,
		typename TImageLUT::KernelParams	lut
	)

Definition at line 264 of file Kernels.h.

 {
     // add sampled image data to
     int x = threadIdx.x + blockIdx.x * blockDim.x;
     int y = threadIdx.y + blockIdx.y * blockDim.y;
     int id = threadIdx.z + blockIdx.z * blockDim.z;
     if (x < lut.imgw && y < lut.imgh && id < kp.njobs) {
 
         float invMean = 1.0f / kp.imgmeans[id];
 
         float startx = positions[id].x - lut.imgw/2*ilut_scale.x;
         float starty = positions[id].y - lut.imgh/2*ilut_scale.y;
         int2 imgpos = lut.GetImagePos(kp.locParams[id].zlutPlane, kp.locParams[id].zlutIndex);
 
         float px = startx + x*ilut_scale.x;
         float py = starty + y*ilut_scale.y;
 
         bool outside=false;
         float v = TImageSampler::Interpolated(kp.images, px, py, id, outside);
 
         float org = TImageLUT::read(lut, x, y, imgpos);
         TImageLUT::write(org+v*invMean, lut, x, y, imgpos);
     }
 }

§ interpolate()

template<typename T >

static __device__ T interpolate	(	T	a,
		T	b,
		float	x
	)

static

Definition at line 15 of file Kernels.h.

15 { return a + (b-a)*x; }

§ QI_ComputeAxisOffset()

__device__ float QI_ComputeAxisOffset	(	cufftComplex *	autoconv,
		int	fftlen,
		float *	shiftbuf
	)

Definition at line 115 of file QI_impl.h.

 {
     typedef float compute_t;
     int nr = fftlen/2;
     for(int x=0;x<fftlen;x++)  {
         shiftbuf[x] = autoconv[(x+nr)%(nr*2)].x;
     }
 
     const float QIWeights[QI_LSQFIT_NWEIGHTS] = QI_LSQFIT_WEIGHTS;
 
     compute_t maxPos = ComputeMaxInterp<compute_t>::Compute(shiftbuf, fftlen, QIWeights);
     compute_t offset = (maxPos - nr) * (3.14159265359f / 4);
     return offset;
 }

§ QI_ComputeProfile()

template<typename TImageSampler >

__global__ void QI_ComputeProfile	(	BaseKernelParams	kp,
		float3 *	positions,
		float *	quadrants,
		float2 *	profiles,
		float2 *	reverseProfiles,
		const QIParams	qiParams,
		float *	d_radialweights,
		int	angularSteps
	)

Definition at line 49 of file QI_impl.h.

 {
     int idx = threadIdx.x + blockDim.x * blockIdx.x;
     if (idx < kp.njobs) {
         const QIParams& qp = qiParams;
         int fftlen = qp.radialSteps*2;
         float* img_qdr = &quadrants[ idx * qp.radialSteps * 4 ];
         for (int q=0;q<4;q++) {
             ComputeQuadrantProfile<TImageSampler> (kp.images, idx, &img_qdr[q*qp.radialSteps], qp, q, 
                 make_float2(positions[idx].x, positions[idx].y), kp.imgmeans[idx], angularSteps);
         }
 
         int nr = qp.radialSteps;
         qicomplex_t* imgprof = (qicomplex_t*) &profiles[idx * fftlen*2];
         qicomplex_t* x0 = imgprof;
         qicomplex_t* x1 = imgprof + nr*1;
         qicomplex_t* y0 = imgprof + nr*2;
         qicomplex_t* y1 = imgprof + nr*3;
 
         qicomplex_t* revprof = (qicomplex_t*)&reverseProfiles[idx*fftlen*2];
         qicomplex_t* xrev = revprof;
         qicomplex_t* yrev = revprof + nr*2;
 
         float* q0 = &img_qdr[0];
         float* q1 = &img_qdr[nr];
         float* q2 = &img_qdr[nr*2];
         float* q3 = &img_qdr[nr*3];
 
         // Build Ix = qL(-r) || qR(r)
         // qL = q1 + q2   (concat0)
         // qR = q0 + q3   (concat1)
         for(int r=0;r<nr;r++) {
             float rw = d_radialweights[r];
             x0[nr-r-1] = make_float2(rw*(q1[r]+q2[r]), 0);
             x1[r] = make_float2( rw*(q0[r]+q3[r]),0);
         }
 
         // Build Iy = [ qB(-r)  qT(r) ]
         // qT = q0 + q1
         // qB = q2 + q3
         for(int r=0;r<nr;r++) {
             float rw = d_radialweights[r];
             y1[r] = make_float2( rw * ( q0[r]+q1[r] ),0);
             y0[nr-r-1] = make_float2( rw * (q2[r]+q3[r]),0);
         }
 
         for(int r=0;r<nr*2;r++) 
             xrev[r] = x0[nr*2-r-1];
         for(int r=0;r<nr*2;r++)
             yrev[r] = y0[nr*2-r-1];
     }
 }

§ QI_ComputeQuadrants()

template<typename TImageSampler >

__global__ void QI_ComputeQuadrants	(	BaseKernelParams	kp,
		float3 *	positions,
		float *	dst_quadrants,
		const QIParams *	params,
		int	angularSteps
	)

Definition at line 162 of file QI_impl.h.

 {
     int jobIdx = threadIdx.x + blockIdx.x * blockDim.x;
     int rIdx = threadIdx.y + blockIdx.y * blockDim.y;
     int quadrant = threadIdx.z;
     
     // Ori: dst[i], i = radial index
     // float* img_qdr = &dst_quadrants[ jobIdx * params->radialSteps * 4 ];
     // float* dst = &img_qdr[quadrant*params->radialSteps];
     // dst[rIdx] = rIdx;
     //count >= MIN_RADPROFILE_SMP_COUNT ? sum/count : kp.imgmeans[jobIdx];
 
     if (jobIdx < kp.njobs && rIdx < params->radialSteps && quadrant < 4) {
         // The variables below could go in shared memory
         float asf = (float)params->maxAngularSteps / angularSteps;
         float rstep = (params->maxRadius - params->minRadius) / params->radialSteps;
         const int qmat[] = {
             1, 1,
             -1, 1,
             -1, -1,
             1, -1 };
 
         // --Stop--
 
         int mx = qmat[2*quadrant+0];
         int my = qmat[2*quadrant+1];
 
         // Ori: dst[i], i = radial index
         // float* img_qdr = &quadrants[ idx * qp.radialSteps * 4 ];
         // dst = &img_qdr[q*qp.radialSteps]
         float* qdr = &dst_quadrants[ (jobIdx * 4 + quadrant) * params->radialSteps ];
         
         float sum = 0.0f;
         float r = params->minRadius + rstep * rIdx;
         float3 pos = positions[jobIdx];
 //      float mean = imgmeans[jobIdx];
 
         int count=0;
         for (int a=0;a<angularSteps;a++) {
             int j = (int)(asf * a);
             float x = pos.x + mx*params->cos_sin_table[j].x * r;
             float y = pos.y + my*params->cos_sin_table[j].y * r;
             bool outside=false;
             float v = TImageSampler::Interpolated(kp.images, x,y,jobIdx, outside);
             if (!outside) {
                 sum += v;
                 count++;
             }
         }
         qdr[rIdx] = count >= MIN_RADPROFILE_SMP_COUNT ? sum/count : kp.imgmeans[jobIdx];
     }
 }

§ QI_MultiplyWithConjugate()

__global__ void QI_MultiplyWithConjugate	(	int	n,
		cufftComplex *	a,
		cufftComplex *	b
	)

Definition at line 102 of file QI_impl.h.

 {
     //int idx = (threadIdx.y + threadIdx.x << 4) + (blockIdx.x + blockIdx.y *( (int)sqrt( (double)(n / (blockDim.x * blockDim.y)) ) + 1)) << 8;
     //int idx = (threadIdx.x + threadIdx.y * blockDim.x) + (blockIdx.x + blockIdx.y *( (int)sqrt( (double)(n / (blockDim.x * blockDim.y)) ) + 1)) * blockDim.x * blockDim.y;
     int idx = threadIdx.x + blockIdx.x * blockDim.x;
     if (idx < n) {
         cufftComplex A = a[idx];
         cufftComplex B = b[idx];
         
         a[idx] = make_float2(A.x*B.x + A.y*B.y, A.y*B.x - A.x*B.y); // multiplying with conjugate
     }
 }

§ QI_OffsetPositions()

__global__ void QI_OffsetPositions	(	int	njobs,
		float3 *	current,
		float3 *	dst,
		cufftComplex *	autoconv,
		int	fftLength,
		float2 *	offsets,
		float	pixelsPerProfLen,
		float *	shiftbuf
	)

Definition at line 130 of file QI_impl.h.

 {
     int idx = threadIdx.x + blockIdx.x * blockDim.x;
 
     if (idx < njobs) {
         float* shifted = &shiftbuf[ idx * fftLength ];      
 
         // X
         cufftComplex* autoconvX = &autoconv[idx * fftLength * 2];
         float xoffset = QI_ComputeAxisOffset(autoconvX, fftLength, shifted);
 
         cufftComplex* autoconvY = autoconvX + fftLength;
         float yoffset = QI_ComputeAxisOffset(autoconvY, fftLength, shifted);
 
         dst[idx].x = current[idx].x + xoffset * pixelsPerProfLen;
         dst[idx].y = current[idx].y + yoffset * pixelsPerProfLen;
         dst[idx].z = current[idx].z;
 
         if (offsets) 
             offsets[idx] = make_float2( xoffset, yoffset);
     }
 }

§ QI_QuadrantsToProfiles()

__global__ void QI_QuadrantsToProfiles	(	BaseKernelParams	kp,
		float *	quadrants,
		float2 *	profiles,
		float2 *	reverseProfiles,
		float *	d_radialweights,
		const QIParams *	params
	)

Definition at line 215 of file QI_impl.h.

 {
 //ComputeQuadrantProfile(cudaImageListf& images, int idx, float* dst, const QIParams& params, int quadrant, float2 center)
     int idx = threadIdx.x + blockDim.x * blockIdx.x;
     if (idx < kp.njobs) {
         int fftlen = params->radialSteps*2;
         float* img_qdr = &quadrants[ idx * params->radialSteps * 4 ];
     //  for (int q=0;q<4;q++)
             //ComputeQuadrantProfile<TImageSampler> (images, idx, &img_qdr[q*params->radialSteps], params, q, img_means[idx], make_float2(positions[idx].x, positions[idx].y));
 
         int nr = params->radialSteps;
         qicomplex_t* imgprof = (qicomplex_t*) &profiles[idx * fftlen*2];
         qicomplex_t* x0 = imgprof;
         qicomplex_t* x1 = imgprof + nr*1;
         qicomplex_t* y0 = imgprof + nr*2;
         qicomplex_t* y1 = imgprof + nr*3;
 
         qicomplex_t* revprof = (qicomplex_t*)&reverseProfiles[idx*fftlen*2];
         qicomplex_t* xrev = revprof;
         qicomplex_t* yrev = revprof + nr*2;
 
         float* q0 = &img_qdr[0];
         float* q1 = &img_qdr[nr];
         float* q2 = &img_qdr[nr*2];
         float* q3 = &img_qdr[nr*3];
 
         // Build Ix = qL(-r) || qR(r)
         // qL = q1 + q2   (concat0)
         // qR = q0 + q3   (concat1)
         for(int r=0;r<nr;r++) {
             float rw = d_radialweights[r];
             x0[nr-r-1] = make_float2( rw * (q1[r]+q2[r]), 0);
             x1[r] = make_float2( rw * (q0[r]+q3[r]),0);
         }
         // Build Iy = [ qB(-r)  qT(r) ]
         // qT = q0 + q1
         // qB = q2 + q3
         for(int r=0;r<nr;r++) {
             float rw = d_radialweights[r];
             y1[r] = make_float2( rw * (q0[r]+q1[r]),0);
             y0[nr-r-1] = make_float2( rw * (q2[r]+q3[r]),0);
         }
 
         for(int r=0;r<nr*2;r++)
             xrev[r] = x0[nr*2-r-1];
         for(int r=0;r<nr*2;r++)
             yrev[r] = y0[nr*2-r-1];
     }
 }

§ ZLUT_ComputeProfileMatchScores()

__global__ void ZLUT_ComputeProfileMatchScores	(	int	njobs,
		ZLUTParams	params,
		float *	profiles,
		float *	compareScoreBuf,
		LocalizationParams *	locParams
	)

Definition at line 121 of file Kernels.h.

 {
     int jobIdx = threadIdx.x + blockIdx.x * blockDim.x;
     int zPlaneIdx = threadIdx.y + blockIdx.y * blockDim.y;
 
     if (jobIdx >= njobs || zPlaneIdx >= params.planes)
         return;
 
     float* prof = &profiles [jobIdx * params.radialSteps()];
     auto mapping = locParams[jobIdx];
     float diffsum = 0.0f;
     for (int r=0;r<params.radialSteps();r++) {
         float d = prof[r] - params.img.pixel(r, zPlaneIdx, mapping.zlutIndex);
         if (params.zcmpwindow)
             d *= params.zcmpwindow[r];
         diffsum += d*d;
     }
 
     compareScoreBuf[ params.planes * jobIdx + zPlaneIdx ] = -diffsum;
 }

§ ZLUT_ComputeZ()

__global__ void ZLUT_ComputeZ	(	int	njobs,
		ZLUTParams	params,
		float3 *	positions,
		float *	compareScoreBuf
	)

Definition at line 108 of file Kernels.h.

 {
     int jobIdx = threadIdx.x + blockIdx.x * blockDim.x;
 
     if (jobIdx < njobs) {
         float* cmp = &compareScoreBuf [params.planes * jobIdx];
 
         const float ZLUTFittingWeights[ZLUT_LSQFIT_NWEIGHTS] = ZLUT_LSQFIT_WEIGHTS;
         float maxPos = ComputeMaxInterp<float, ZLUT_LSQFIT_NWEIGHTS>::Compute(cmp, params.planes, ZLUTFittingWeights);
         positions[jobIdx].z = maxPos;
     }
 }

§ ZLUT_NormalizeProfiles()

__global__ void ZLUT_NormalizeProfiles	(	int	njobs,
		ZLUTParams	params,
		float *	profiles
	)

Definition at line 142 of file Kernels.h.

 {
     int jobIdx = threadIdx.x + blockIdx.x * blockDim.x;
 
     if (jobIdx < njobs) {
         float* prof = &profiles[params.radialSteps()*jobIdx];
 
         // First, subtract mean
         float mean = 0.0f;
         for (int i=0;i<params.radialSteps();i++) {
             mean += prof[i];
         }
         mean /= params.radialSteps();
 
         float rmsSum2 = 0.0f;
         for (int i=0;i<params.radialSteps();i++){
             prof[i] -= mean;
             rmsSum2 += prof[i]*prof[i];
         }
 
         // And make RMS power equal 1
         float invTotalRms = 1.0f / sqrt(rmsSum2/params.radialSteps());
         for (int i=0;i<params.radialSteps();i++)
             prof[i] *= invTotalRms;
     }
 }

§ ZLUT_ProfilesToZLUT()

__global__ void ZLUT_ProfilesToZLUT	(	int	njobs,
		cudaImageListf	images,
		ZLUTParams	params,
		float3 *	positions,
		LocalizationParams *	locParams,
		float *	profiles
	)

Definition at line 69 of file Kernels.h.

 {
     int idx = threadIdx.x + blockDim.x * blockIdx.x;
 
     if (idx < njobs) {
         auto m = locParams[idx];
         float* dst = params.GetRadialZLUT(m.zlutIndex, m.zlutPlane );
 
         for (int i=0;i<params.radialSteps();i++)
             dst [i] += profiles [ params.radialSteps()*idx + i ];
     }
 }

§ ZLUT_RadialProfileKernel()

template<typename TImageSampler >

__global__ void ZLUT_RadialProfileKernel	(	int	njobs,
		cudaImageListf	images,
		ZLUTParams	params,
		float3 *	positions,
		float *	profiles,
		float *	means
	)

Definition at line 84 of file Kernels.h.

 {
     int jobIdx = threadIdx.x + blockIdx.x * blockDim.x;
     int radialIdx = threadIdx.y + blockIdx.y * blockDim.y;
 
     if (jobIdx >= njobs || radialIdx >= params.radialSteps()) 
         return;
 
     float* dstprof = &profiles[params.radialSteps() * jobIdx];
     float r = params.minRadius + (params.maxRadius-params.minRadius)*radialIdx/params.radialSteps();
     float sum = 0.0f;
     int count = 0;
     
     for (int i=0;i<params.angularSteps;i++) {
         float x = positions[jobIdx].x + params.trigtable[i].x * r;
         float y = positions[jobIdx].y + params.trigtable[i].y * r;
 
         bool outside=false;
         sum += TImageSampler::Interpolated(images, x,y, jobIdx, outside);
         if (!outside) count++;
     }
     dstprof [radialIdx] = count>MIN_RADPROFILE_SMP_COUNT ? sum/count : means[jobIdx];
 }

Functions

Detailed Description

Function Documentation

§ ApplyOffsetGain()

§ BgCorrectedCOM() [1/2]

§ BgCorrectedCOM() [2/2]

§ ComputeQuadrantProfile()

§ ForceCUDAKernelsToLoad()

§ G2MLE_Compute()

§ ImageLUT_Sample()

§ interpolate()

§ QI_ComputeAxisOffset()

§ QI_ComputeProfile()

§ QI_ComputeQuadrants()

§ QI_MultiplyWithConjugate()

§ QI_OffsetPositions()

§ QI_QuadrantsToProfiles()

§ ZLUT_ComputeProfileMatchScores()

§ ZLUT_ComputeZ()

§ ZLUT_NormalizeProfiles()

§ ZLUT_ProfilesToZLUT()

§ ZLUT_RadialProfileKernel()