QTrk
Classes | Functions | Variables
test.cu File Reference
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "std_incl.h"
#include "utils.h"
#include <cassert>
#include <cstdlib>
#include <stdio.h>
#include <windows.h>
#include <cstdarg>
#include <valarray>
#include "random_distr.h"
#include <stdint.h>
#include "gpu_utils.h"
#include "QueuedCUDATracker.h"
#include "QueuedCPUTracker.h"
#include "../cputrack-test/SharedTests.h"
#include "BenchmarkLUT.h"
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/generate.h>
#include <thrust/reduce.h>
#include <thrust/functional.h>
#include "FisherMatrix.h"
#include "testutils.h"
#include "ResultManager.h"
#include "ExtractBeadImages.h"

Go to the source code of this file.

Classes

struct  SpeedInfo
 

Functions

void BenchmarkParams ()
 
std::string getPath (const char *file)
 
__device__ float2 mul_conjugate (float2 a, float2 b)
 
void ShowCUDAError ()
 
__device__ float compute (int idx, float *buf, int s)
 
__global__ void testWithGlobal (int n, int s, float *result, float *buf)
 
__global__ void testWithShared (int n, int s, float *result)
 
void TestSharedMem ()
 
void QTrkCompareTest ()
 
void listDevices ()
 
__global__ void SimpleKernel (int N, float *a)
 
void TestAsync ()
 
__global__ void emptyKernel ()
 
float SpeedTest (const QTrkSettings &cfg, QueuedTracker *qtrk, int count, bool haveZLUT, LocMode_t locType, float *scheduleTime, bool gaincorrection=false)
 
int NearestPowerOfTwo (int v)
 
int SmallestPowerOfTwo (int minval)
 
SpeedInfo SpeedCompareTest (int w, LocalizeModeEnum locMode, bool haveZLUT, int qi_iterations=5)
 
void ProfileSpeedVsROI (LocalizeModeEnum locMode, const char *outputcsv, bool haveZLUT, int qi_iterations)
 
void CompareAccuracy (const char *lutfile)
 
void BasicQTrkTest ()
 
void BasicQTrkTest_RM ()
 
void TestGauss2D (bool calib)
 
void TestRadialLUTGradientMethod ()
 
void QICompare (const char *lutfile)
 
void TestBenchmarkLUT ()
 
template<typename T >
void check_arg (const std::vector< std::string > &args, const char *name, T *param)
 
void check_strarg (const std::vector< std::string > &args, const char *name, std::string *param)
 
int CmdLineRun (int argc, char *argv[])
 
void BuildZLUT (std::string folder, outputter *output)
 
int main (int argc, char *argv[])
 

Variables

__shared__ float cudaSharedMem []
 
std::vector< float > cmp_cpu_qi_prof
 
std::vector< float > cmp_gpu_qi_prof
 
std::vector< std::complex< float > > cmp_cpu_qi_fft_out
 
std::vector< std::complex< float > > cmp_gpu_qi_fft_out
 

Function Documentation

§ BasicQTrkTest()

void BasicQTrkTest ( )

Definition at line 535 of file test.cu.

536 {
538  cc.width = cc.height = 100;
539  cc.Update();
540  QueuedCUDATracker qtrk(cc);
541 
542  float zmin=1,zmax=5;
543  ImageData img = ImageData::alloc(cc.width,cc.height);
544 
545  float pos_x = cc.width/2 - 5;
546  float pos_y = cc.height/2 + 3;
547  GenerateTestImage(img, pos_x, pos_y, (zmin+zmax)/2, 0);
548 
549  int N = 100000;
550 #ifdef _DEBUG
551  N = 10000;
552 #endif
553  double t = GetPreciseTime();
554  qtrk.SetLocalizationMode((LocMode_t)(LT_QI|LT_NormalizeProfile));
555  for (int i=0;i<N;i++)
556  {
557  LocalizationJob job ( i, 0, 0, 0);
558  qtrk.ScheduleLocalization((uchar*)img.data, sizeof(float)*cc.width, QTrkFloat, &job);
559  if(i%std::max(1,(int)(N*0.1))==0) dbgprintf("Queued: %d / %d\n", i, N);
560  }
561  WaitForFinish(&qtrk, N);
562  t = GetPreciseTime() - t;
563  dbgprintf("Speed: %d imgs/s (Only QI, %d iterations)\n", (int)(N / t), cc.qi_iterations);
564  int count = 0;
565 
566  while(qtrk.GetResultCount() != 0){
567  LocalizationResult res;
568  qtrk.FetchResults(&res,1);
569  if( res.pos.x > pos_x + 0.01f || res.pos.x < pos_x - 0.01f || res.pos.y > pos_y + 0.01f || res.pos.y < pos_y - 0.01f ){
570  if(count < 100)
571  dbgprintf("Location frame %d: (%02f,%02f)\n",res.job.frame, res.pos.x, res.pos.y);
572  count++;
573  }
574  }
575 
576  dbgprintf("Errors: %d/%d (%f%%)\n", count, N, (float)100*count/N);
577  img.free();
578 }
void GenerateTestImage(ImageData &img, float xp, float yp, float size, float SNratio)
Definition: utils.cpp:162
static double WaitForFinish(QueuedTracker *qtrk, int N)
Definition: SharedTests.h:300
64 bit float
Definition: qtrk_c_api.h:37
int width
Width of regions of interest to be handled. Typically equals height (square ROI). ...
Definition: qtrk_c_api.h:106
Structure for job results.
Definition: qtrk_c_api.h:67
int height
Height of regions of interest to be handled. Typically equals width (square ROI). ...
Definition: qtrk_c_api.h:107
uint frame
Frame number this ROI belongs to.
Definition: qtrk_c_api.h:56
static TImageData alloc(int w, int h)
Definition: utils.h:110
double GetPreciseTime()
Definition: utils.cpp:669
int LocMode_t
Definition: qtrk_c_api.h:30
void Update()
Compute the derived settings.
vector3f pos
Final 3D position found. If no z localization was performed, the value of z will be 0...
Definition: qtrk_c_api.h:69
Normalize found radial profiles.
Definition: qtrk_c_api.h:22
LocalizationJob job
Job metadata. See LocalizationJob.
Definition: qtrk_c_api.h:68
void free()
Definition: utils.h:111
Structure for derived settings computed from base settings in QTrkSettings.
Definition: qtrk_c_api.h:189
COM+QI.
Definition: qtrk_c_api.h:9
void dbgprintf(const char *fmt,...)
Definition: utils.cpp:149
int qi_iterations
Number of times to run the QI algorithm, sampling around the last found position. ...
Definition: qtrk_c_api.h:140
CUDA implementation of the QueuedTracker interface.
Structure for region of interest metadata.
Definition: qtrk_c_api.h:49
unsigned char uchar
Definition: std_incl.h:130
T * data
Definition: utils.h:80

§ BasicQTrkTest_RM()

void BasicQTrkTest_RM ( )

Definition at line 580 of file test.cu.

581 {
583  //cc.qi_iterations = 10;
584  cc.width = cc.height = 100;
585  cc.Update();
586  QueuedCUDATracker qtrk(cc);
587 
588  float zmin=1,zmax=5;
589  ImageData img = ImageData::alloc(cc.width,cc.height);
590 
591  // Positions to set
592  float pos_x = cc.width/2 - 5;
593  float pos_y = cc.height/2 + 3;
594  GenerateTestImage(img, pos_x, pos_y, (zmin+zmax)/2, 0);
595 
596  int N = 100000;
597 #ifdef _DEBUG
598  N = 100000;
599 #endif
600  qtrk.SetLocalizationMode((LocMode_t)(LT_QI|LT_NormalizeProfile));
601 
602  ResultManagerConfig RMcfg;
603  RMcfg.numBeads = 1;
604  RMcfg.numFrameInfoColumns = 0;
605  RMcfg.scaling = vector3f(1.0f,1.0f,1.0f);
606  RMcfg.offset = vector3f(0.0f,0.0f,0.0f);
607  RMcfg.writeInterval = 4000;
608  RMcfg.maxFramesInMemory = 0;
609  RMcfg.binaryOutput = false;
610 
611  std::vector<std::string> colnames;
612  for(int ii = 0;ii<RMcfg.numFrameInfoColumns;ii++){
613  colnames.push_back(SPrintf("%d",ii));
614  }
615 
616  outputter output(Files+Images);
617 
618  ResultManager RM(
619  SPrintf("%s\\RMOutput.txt",output.folder.c_str()).c_str(),
620  SPrintf("%s\\RMFrameInfo.txt",output.folder.c_str()).c_str(),
621  &RMcfg, colnames);
622 
623  RM.SetTracker(&qtrk);
624  double t = GetPreciseTime();
625  for (int i=0;i<N;i++)
626  {
627  LocalizationJob job ( i, 0, 0, 0);
628  qtrk.ScheduleLocalization((uchar*)img.data, sizeof(float)*cc.width, QTrkFloat, &job);
629  //if(i%std::max(1,N/1000)==0) dbgprintf("Queued: %d / %d\n", i, N);
630  }
631  printf("\nDone queueing!\n");
632  // Tell the tracker to perform the localizations left in the queue regardless of batchSize
633  qtrk.Flush();
634 
635  // Halt the test (=timer) until all localizations are done.
636  while(RM.GetFrameCounters().localizationsDone < N);
637  t = GetPreciseTime() - t;
638 
639  // Tell the resultmanager to print the final available results regardless of writeInterval
640  RM.Flush();
641  while(RM.GetFrameCounters().lastSaveFrame != N);
642 
643  dbgprintf("Speed: %d imgs/s (Only QI, %d iterations)\n", (int)(N / t), cc.qi_iterations);
644 
645  img.free();
646 }
void GenerateTestImage(ImageData &img, float xp, float yp, float size, float SNratio)
Definition: utils.cpp:162
int numBeads
Number of beads for which to grab results. Should always equal the amount of beads in a single frame...
Definition: ResultManager.h:64
64 bit float
Definition: qtrk_c_api.h:37
int width
Width of regions of interest to be handled. Typically equals height (square ROI). ...
Definition: qtrk_c_api.h:106
vector3f scaling
Scaling factor for each of the three dimensions.
Definition: ResultManager.h:71
uint maxFramesInMemory
Number of frames for which to keep the data in memory. 0 for infinite.
Definition: ResultManager.h:80
vector3f offset
Offset value for each of the three dimensions.
Definition: ResultManager.h:78
int height
Height of regions of interest to be handled. Typically equals width (square ROI). ...
Definition: qtrk_c_api.h:107
Class that handles data gathering and saving from QueuedTracker instances.
Definition: ResultManager.h:91
int numFrameInfoColumns
Number of columns in the frame info metadata file. Additional columns can be added to save more data ...
Definition: ResultManager.h:65
static TImageData alloc(int w, int h)
Definition: utils.h:110
double GetPreciseTime()
Definition: utils.cpp:669
int LocMode_t
Definition: qtrk_c_api.h:30
void Update()
Compute the derived settings.
Normalize found radial profiles.
Definition: qtrk_c_api.h:22
Structure for settings used by ResultManager.
Definition: ResultManager.h:62
void free()
Definition: utils.h:111
void SetTracker(QueuedTracker *qtrk)
Set the tracker from which to fetch results.
Structure for derived settings computed from base settings in QTrkSettings.
Definition: qtrk_c_api.h:189
COM+QI.
Definition: qtrk_c_api.h:9
vector3< float > vector3f
Definition: std_incl.h:114
void dbgprintf(const char *fmt,...)
Definition: utils.cpp:149
int qi_iterations
Number of times to run the QI algorithm, sampling around the last found position. ...
Definition: qtrk_c_api.h:140
CUDA implementation of the QueuedTracker interface.
int writeInterval
Interval of number of gathered frames at which to write the data.
Definition: ResultManager.h:79
uint8_t binaryOutput
Flag (boolean) to output a binary file instead of a text file.
Definition: ResultManager.h:81
Structure for region of interest metadata.
Definition: qtrk_c_api.h:49
unsigned char uchar
Definition: std_incl.h:130
std::string SPrintf(const char *fmt,...)
Definition: utils.cpp:132
T * data
Definition: utils.h:80

§ BenchmarkParams()

void BenchmarkParams ( )

Definition at line 120 of file Benchmark.cpp.

121 {
122  /*
123  - Accuracy vs ROIsize
124  - Speed vs ROIsize
125  */
126 #ifdef _DEBUG
127  int n = 50;
128 #else
129  int n = 300;
130 #endif
131 
132  int mpv = 10000;
133  float pixel_size = 120, lutstep = 50;
134 
135  for (int zlutbias=0;zlutbias<2;zlutbias++) {
136  float range_in_nm=0;
137  for (int bias=0;bias<2;bias++) {
138  for (int i=0;i<5;i++)
139  BenchmarkROISizes(SPrintf("roi_qi%d_bias%d_zlutbias%d.txt",i,bias, zlutbias).c_str(), n, mpv, i, 0, range_in_nm, pixel_size, lutstep, zlutbias ? BUILDLUT_BIASCORRECT : 0);
140  // for (int i=0;i<5;i++)
141  // BenchmarkROISizes(SPrintf("roi_qi%d_bias%d_wz.txt",i,bias).c_str(), n, mpv, i, LT_LocalizeZWeighted, range_in_nm, pixel_size, lutstep);
142  BenchmarkROISizes( SPrintf("roi_xcor_bias%d_zlutbias%d.txt", bias, zlutbias).c_str(), n, mpv, 0, LT_XCor1D, range_in_nm, pixel_size, lutstep, zlutbias ? BUILDLUT_BIASCORRECT : 0);
143  // BenchmarkROISizes( SPrintf("roi_xcor_bias%d_wz.txt",bias).c_str(), n, mpv, 0, LT_XCor1D | LT_LocalizeZWeighted, range_in_nm, pixel_size, lutstep);
144  range_in_nm = 200;
145  }
146  }
147 
148  QTrkSettings basecfg;
149  basecfg.width = 80;
150  basecfg.height = 80;
151  basecfg.qi_iterations = 4;
152  basecfg.qi_roi_coverage = 1;
153  basecfg.qi_minradius=1;
154  basecfg.zlut_minradius=1;
155  basecfg.qi_radial_coverage = 2.5f;
156  basecfg.qi_angular_coverage = 0.7f;
157  basecfg.zlut_roi_coverage = 1;
158  basecfg.zlut_radial_coverage = 1.5f;
159  basecfg.com_bgcorrection = 0;
160  basecfg.qi_angstep_factor = 1.1f;
161  basecfg.zlut_angular_coverage = 0.7f;
162 
163  //BenchmarkConfigParamRange (20000, &QTrkSettings::qi_iterations, &basecfg, linspace(1, 6, 6), "qi_iterations_noise", mpv);
164 /*
165  BenchmarkConfigParamRange (n, &QTrkSettings::qi_radial_coverage, &basecfg, linspace(0.2f, 4.0f, 20), "qi_rad_cov_noise", mpv );
166  BenchmarkConfigParamRange (n, &QTrkSettings::zlut_radial_coverage, &basecfg, linspace(0.2f, 4.0f, 20), "zlut_rad_cov_noise", mpv);
167  BenchmarkConfigParamRange (n, &QTrkSettings::qi_iterations, &basecfg, linspace(1, 6, 6), "qi_iterations_noise", mpv);
168  BenchmarkZAccuracy("zpos-noise.txt", n, mpv);
169 
170  BenchmarkROISizes("roi-sizes.txt", n, 0);
171  BenchmarkConfigParamRange (n, &QTrkSettings::qi_radial_coverage, &basecfg, linspace(0.2f, 4.0f, 20), "qi_rad_cov", 0);
172  BenchmarkConfigParamRange (n, &QTrkSettings::zlut_radial_coverage, &basecfg, linspace(0.2f, 4.0f, 20), "zlut_rad_cov", 0);
173  BenchmarkConfigParamRange (n, &QTrkSettings::qi_iterations, &basecfg, linspace(1, 6, 6), "qi_iterations", 0);
174  BenchmarkZAccuracy("zpos.txt", n, 0);*/
175 }
float zlut_minradius
Distance in pixels from the bead center from which to start sampling profiles. Default 1...
Definition: qtrk_c_api.h:135
float qi_minradius
Distance in pixels from the bead center from which to start sampling profiles. Default 1...
Definition: qtrk_c_api.h:141
int width
Width of regions of interest to be handled. Typically equals height (square ROI). ...
Definition: qtrk_c_api.h:106
int height
Height of regions of interest to be handled. Typically equals width (square ROI). ...
Definition: qtrk_c_api.h:107
Structure for the settings used by the algorithms implemented in QueuedTracker.
Definition: qtrk_c_api.h:82
COM+XCor1D.
Definition: qtrk_c_api.h:8
float com_bgcorrection
Background correction factor for COM. Defines the number of standard deviations data needs to be away...
Definition: qtrk_c_api.h:133
float zlut_roi_coverage
Factor of the ROI to include in sampling. Between 0 and 1, default 1. Maxradius = ROI/2*roi_coverage...
Definition: qtrk_c_api.h:138
float qi_radial_coverage
Sampling points per radial pixel. Default 3.0.
Definition: qtrk_c_api.h:142
float zlut_angular_coverage
Factor of the sampling perimeter to cover with angular sampling steps. Between 0 and 1...
Definition: qtrk_c_api.h:137
void BenchmarkROISizes(const char *name, int n, int MaxPixelValue, int qi_iterations, int extraFlags, float range_in_nm, float pixel_size, float lutstep, int buildLUTFlags)
Definition: Benchmark.cpp:14
float qi_roi_coverage
Factor of the ROI to include in sampling. Between 0 and 1, default 1. Maxradius = ROI/2*roi_coverage...
Definition: qtrk_c_api.h:144
int qi_iterations
Number of times to run the QI algorithm, sampling around the last found position. ...
Definition: qtrk_c_api.h:140
float qi_angstep_factor
Factor to reduce angular steps on lower iterations. Default 1.0 (no effect).
Definition: qtrk_c_api.h:157
float zlut_radial_coverage
Sampling points per radial pixel. Default 3.0.
Definition: qtrk_c_api.h:136
float qi_angular_coverage
Factor of the sampling perimeter to cover with angular sampling steps. Between 0 and 1...
Definition: qtrk_c_api.h:143
std::string SPrintf(const char *fmt,...)
Definition: utils.cpp:132
#define BUILDLUT_BIASCORRECT

§ BuildZLUT()

void BuildZLUT ( std::string  folder,
outputter output 
)

Definition at line 944 of file test.cu.

945 {
946  int ROISize = 100;
947  std::vector<BeadPos> beads = read_beadlist(SPrintf("%sbeadlist.txt",folder.c_str()));
948 
949 
950  int numImgInStack = 1218;
951  int numPositions = 1001; // 10nm/frame
952  float range = 10.0f; // total range 25.0 um -> 35.0 um
953  float umPerImg = range/numImgInStack;
954 
955  QTrkComputedConfig cfg;
956  cfg.width=cfg.height = ROISize;
957  cfg.qi_angstep_factor = 1;
958  cfg.qi_iterations = 6;
959  cfg.qi_angular_coverage = 0.7f;
960  cfg.qi_roi_coverage = 1;
961  cfg.qi_radial_coverage = 1.5f;
962  cfg.qi_minradius=0;
963  cfg.zlut_minradius=0;
964  cfg.zlut_angular_coverage = 0.7f;
965  cfg.zlut_roi_coverage = 1;
966  cfg.zlut_radial_coverage = 1.5f;
967  cfg.zlut_minradius = 0;
968  cfg.qi_minradius = 0;
969  cfg.com_bgcorrection = 0;
970  cfg.xc1_profileLength = ROISize*0.8f;
971  cfg.xc1_profileWidth = ROISize*0.2f;
972  cfg.xc1_iterations = 1;
973  cfg.Update();
974  cfg.WriteToFile();
975 
976  int zplanes = 50;
977 
978  QueuedCUDATracker* qtrk = new QueuedCUDATracker(cfg);
979  //qtrk->SetLocalizationMode(LT_NormalizeProfile | LT_QI);
980  qtrk->SetRadialZLUT(0, beads.size(), zplanes);
981  qtrk->BeginLUT(0);
982 
983  int pxPerBead = ROISize*ROISize;
984  int memSizePerBead = pxPerBead*sizeof(float);
985  int startFrame = 400;
986  for(int plane = 0; plane < zplanes; plane++){
987  output->outputString(SPrintf("Frame %d/%d",plane+1,zplanes),true);
988  int frameNum = startFrame+(int)(numImgInStack-startFrame)*((float)plane/zplanes);
989  std::string file = SPrintf("%s\img%05d.jpg",folder.c_str(),frameNum);
990 
991  ImageData frame = ReadJPEGFile(file.c_str());
992 
993  float* data = new float[beads.size()*pxPerBead];
994 
995  for(uint ii = 0; ii < beads.size(); ii++){
996  vector2f pos;
997  pos.x = beads.at(ii).x - ROISize/2;
998  pos.y = beads.at(ii).y - ROISize/2;
999  ImageData crop = CropImage(frame,pos.x,pos.y,ROISize,ROISize);
1000  //output->outputImage(crop,SPrintf("%d-%05d",ii,plane));
1001  memcpy(data+ii*pxPerBead,crop.data,memSizePerBead);
1002  crop.free();
1003  }
1004 
1005  /*
1006  // To verify seperate frame bead stack generation
1007  output->newFile(SPrintf("data-plane-%d",plane));
1008  output->outputArray(data,beads.size()*pxPerBead);
1009 
1010  ImageData allBeads = ImageData(data,ROISize,ROISize*beads.size());
1011  output->outputImage(allBeads,SPrintf("allBeads-%05d",frameNum));//*/
1012 
1013  qtrk->BuildLUT(data, sizeof(float)*ROISize, QTrkFloat, plane);
1014 
1015  frame.free();
1016  delete[] data;
1017  }
1018 
1019  qtrk->FinalizeLUT();
1020  float* luts = new float[beads.size()*(zplanes*cfg.zlut_radialsteps)];
1021  qtrk->GetRadialZLUT(luts);
1022 
1023  for(int ii = 0; ii < beads.size(); ii++){
1024  ImageData lut = ImageData::alloc(cfg.zlut_radialsteps, zplanes);
1025  memcpy(lut.data, &luts[ii*cfg.zlut_radialsteps*zplanes], cfg.zlut_radialsteps*zplanes*sizeof(float));
1026  //memcpy(lut.data,qtrk->GetZLUTByIndex(ii),cfg.zlut_radialsteps*zplanes*sizeof(float));
1027  //output->outputImage(lut,SPrintf("lut%03d,%d",beads.at(ii).x,beads.at(ii).y));
1028  output->outputImage(lut, SPrintf("lut%03d",ii));
1029  lut.free();
1030  }
1031 
1032  qtrk->Flush();
1033  delete qtrk;
1034 }
float zlut_minradius
Distance in pixels from the bead center from which to start sampling profiles. Default 1...
Definition: qtrk_c_api.h:135
64 bit float
Definition: qtrk_c_api.h:37
void Flush() override
Stop waiting for more jobs to do, and just process the current batch.
void outputImage(ImageData img, std::string filename="UsedImage")
Definition: testutils.cpp:82
float qi_minradius
Distance in pixels from the bead center from which to start sampling profiles. Default 1...
Definition: qtrk_c_api.h:141
int width
Width of regions of interest to be handled. Typically equals height (square ROI). ...
Definition: qtrk_c_api.h:106
void BuildLUT(void *data, int pitch, QTRK_PixelDataType pdt, int plane, vector2f *known_pos=0) override
Add a new lookup table plane.
void outputString(std::string out, bool ConsoleOnly=false)
Definition: testutils.cpp:70
unsigned int uint
Definition: std_incl.h:127
int height
Height of regions of interest to be handled. Typically equals width (square ROI). ...
Definition: qtrk_c_api.h:107
void GetRadialZLUT(float *data) override
Get the radial lookup tables used for z tracking.
int zlut_radialsteps
Number of radial steps to sample on.
Definition: qtrk_c_api.h:198
static TImageData alloc(int w, int h)
Definition: utils.h:110
void Update()
Compute the derived settings.
void FinalizeLUT() override
Finalize the lookup tables in memory.
float com_bgcorrection
Background correction factor for COM. Defines the number of standard deviations data needs to be away...
Definition: qtrk_c_api.h:133
ImageData CropImage(ImageData img, int x, int y, int w, int h)
Definition: testutils.cpp:132
void free()
Definition: utils.h:111
void BeginLUT(uint flags) override
Setup to begin building a lookup table.
int ReadJPEGFile(uchar *srcbuf, int srclen, uchar **data, int *width, int *height)
Definition: fastjpg.cpp:12
float zlut_roi_coverage
Factor of the ROI to include in sampling. Between 0 and 1, default 1. Maxradius = ROI/2*roi_coverage...
Definition: qtrk_c_api.h:138
float qi_radial_coverage
Sampling points per radial pixel. Default 3.0.
Definition: qtrk_c_api.h:142
Structure for derived settings computed from base settings in QTrkSettings.
Definition: qtrk_c_api.h:189
float zlut_angular_coverage
Factor of the sampling perimeter to cover with angular sampling steps. Between 0 and 1...
Definition: qtrk_c_api.h:137
void SetRadialZLUT(float *data, int numLUTs, int planes) override
Set the radial lookup tables to be used for z tracking.
float qi_roi_coverage
Factor of the ROI to include in sampling. Between 0 and 1, default 1. Maxradius = ROI/2*roi_coverage...
Definition: qtrk_c_api.h:144
int qi_iterations
Number of times to run the QI algorithm, sampling around the last found position. ...
Definition: qtrk_c_api.h:140
CUDA implementation of the QueuedTracker interface.
int xc1_iterations
Number of times to run the cross correlation algorithm.
Definition: qtrk_c_api.h:161
float qi_angstep_factor
Factor to reduce angular steps on lower iterations. Default 1.0 (no effect).
Definition: qtrk_c_api.h:157
int xc1_profileWidth
Profile width for the cross correlation.
Definition: qtrk_c_api.h:160
void WriteToFile()
Write all settings to specified output file (Jordi, to combine with QTrkSettings.testRun) ...
float zlut_radial_coverage
Sampling points per radial pixel. Default 3.0.
Definition: qtrk_c_api.h:136
float qi_angular_coverage
Factor of the sampling perimeter to cover with angular sampling steps. Between 0 and 1...
Definition: qtrk_c_api.h:143
int xc1_profileLength
Profile length for the cross correlation.
Definition: qtrk_c_api.h:159
std::string SPrintf(const char *fmt,...)
Definition: utils.cpp:132
T * data
Definition: utils.h:80

§ check_arg()

template<typename T >
void check_arg ( const std::vector< std::string > &  args,
const char *  name,
T *  param 
)

Definition at line 743 of file test.cu.

744 {
745  for (uint i=0;i<args.size();i++) {
746  if (args[i] == name) {
747  *param = (T)atof(args[i+1].c_str());
748  return;
749  }
750  }
751 }
unsigned int uint
Definition: std_incl.h:127

§ check_strarg()

void check_strarg ( const std::vector< std::string > &  args,
const char *  name,
std::string *  param 
)

Definition at line 753 of file test.cu.

754 {
755  for (uint i=0;i<args.size();i++) {
756  if (args[i] == name) {
757  *param = args[i+1];
758  return;
759  }
760  }
761 }
unsigned int uint
Definition: std_incl.h:127

§ CmdLineRun()

int CmdLineRun ( int  argc,
char *  argv[] 
)

Definition at line 763 of file test.cu.

764 {
765  QTrkSettings cfg;
766  std::vector<std::string> args(argc-1);
767  for (int i=0;i<argc-1;i++)
768  args[i]=argv[i+1];
769 
770  check_arg(args, "roi", &cfg.width);
771  cfg.height=cfg.width;
772 
773  int count=100;
774  check_arg(args, "count", &count);
775 
776  std::string outputfile, fixlutfile, inputposfile, bmlutfile, rescaledlutfile;
777  std::string radialWeightsFile;
778  check_strarg(args, "output", &outputfile);
779  check_strarg(args, "fixlut", &fixlutfile);
780  check_strarg(args, "bmlut", &bmlutfile);
781  check_strarg(args, "inputpos", &inputposfile);
782  check_strarg(args, "regenlut", &rescaledlutfile);
783  check_strarg(args, "radweights", &radialWeightsFile);
784 
785  std::string crlboutput;
786  check_strarg(args, "crlb", &crlboutput);
787 
788  std::vector< vector3f > inputPos;
789  if (!inputposfile.empty()) {
790  inputPos = ReadVector3CSV(inputposfile.c_str());
791  count = inputPos.size();
792  }
793 
794  check_arg(args, "zlut_minradius", &cfg.zlut_minradius);
795  check_arg(args, "zlut_radial_coverage", &cfg.zlut_radial_coverage);
796  check_arg(args, "zlut_angular_coverage", &cfg.zlut_angular_coverage);
797  check_arg(args, "zlut_roi_coverage", &cfg.zlut_roi_coverage);
798 
799  check_arg(args, "qi_iterations", &cfg.qi_iterations);
800  check_arg(args, "qi_minradius", &cfg.qi_minradius);
801  check_arg(args, "qi_radial_coverage", &cfg.qi_radial_coverage);
802  check_arg(args, "qi_angular_coverage", &cfg.qi_angular_coverage);
803  check_arg(args, "qi_roi_coverage", &cfg.qi_roi_coverage);
804  check_arg(args, "qi_angstep_factor", &cfg.qi_angstep_factor);
805  check_arg(args, "downsample", &cfg.downsample);
806 
807  int zlutAlign=0;
808  check_arg(args, "zlutalign", &zlutAlign);
809 
810  float pixelmax = 28 * 255;
811  check_arg(args, "pixelmax", &pixelmax);
812 
813  std::string lutsmpfile;
814  check_strarg(args, "lutsmpfile", &lutsmpfile);
815 
816  int cuda=1;
817  check_arg(args, "cuda", &cuda);
818  QueuedTracker* qtrk;
819 
820  if (cuda) qtrk = new QueuedCUDATracker(cfg);
821  else qtrk = new QueuedCPUTracker(cfg);
822 
823  ImageData lut;
824  BenchmarkLUT bmlut;
825 
826  if (!fixlutfile.empty())
827  {
828  lut = ReadJPEGFile(fixlutfile.c_str());
829 
830  if(!rescaledlutfile.empty()) {
831  // rescaling allowed
832  ImageData newlut;
833  ResampleLUT(qtrk, &lut, lut.h, &newlut, rescaledlutfile.c_str());
834  lut.free();
835  lut=newlut;
836  }
837  else if (lut.w != qtrk->cfg.zlut_radialsteps) {
838  lut.free();
839  dbgprintf("Invalid LUT size (%d). Expecting %d radialsteps\n", lut.w, qtrk->cfg.zlut_radialsteps);
840  delete qtrk;
841  return -1;
842  }
843 
844  qtrk->SetRadialZLUT(lut.data,1,lut.h);
845  }
846  else
847  {
848  if (bmlutfile.empty()) {
849  delete qtrk;
850  dbgprintf("No lut file\n");
851  return -1;
852  }
853 
854  bmlut.Load(bmlutfile.c_str());
855  lut = ImageData::alloc(qtrk->cfg.zlut_radialsteps, bmlut.lut_h);
856  bmlut.GenerateLUT(&lut);
857 
858  if (!rescaledlutfile.empty())
859  WriteJPEGFile(rescaledlutfile.c_str(), lut);
860 
861  qtrk->SetRadialZLUT(lut.data,1,lut.h);
862  }
863 
864  if (inputPos.empty()) {
865  inputPos.resize(count);
866  for (int i=0;i<count;i++){
867  inputPos[i]=vector3f(cfg.width/2,cfg.height/2,lut.h/2);
868  }
869  }
870 
871  if (!radialWeightsFile.empty())
872  {
873  auto rwd = ReadCSV(radialWeightsFile.c_str());
874  std::vector<float> rw(rwd.size());
875  if (rw.size() == qtrk->cfg.zlut_radialsteps)
876  qtrk->SetRadialWeights(&rw[0]);
877  else {
878  dbgprintf("Invalid # radial weights");
879  delete qtrk;
880  }
881  }
882 
883  std::vector<ImageData> imgs (inputPos.size());
884 
885  std::vector<vector3f> crlb(inputPos.size());
886 
887  for (uint i=0;i<inputPos.size();i++) {
888  imgs[i]=ImageData::alloc(cfg.width, cfg.height);
889  //vector3f pos = centerpos + range*vector3f(rand_uniform<float>()-0.5f, rand_uniform<float>()-0.5f, rand_uniform<float>()-0.5f)*2;
890 
891  auto p = inputPos[i];
892  if (!bmlut.lut_w) {
893  GenerateImageFromLUT(&imgs[i], &lut, qtrk->cfg.zlut_minradius, qtrk->cfg.zlut_maxradius, p, false);
894  if (!crlboutput.empty()) {
895  SampleFisherMatrix sfm(pixelmax);
896  crlb[i]=sfm.Compute(p, vector3f(1,1,1)*0.001f, lut, qtrk->cfg.width,qtrk->cfg.height, qtrk->cfg.zlut_minradius, qtrk->cfg.zlut_maxradius).Inverse().diag();
897  }
898  } else
899  bmlut.GenerateSample(&imgs[i], p, qtrk->cfg.zlut_minradius, qtrk->cfg.zlut_maxradius);
900  imgs[i].normalize();
901  if (pixelmax > 0) ApplyPoissonNoise(imgs[i], pixelmax, 255);
902  if(i==0 && !lutsmpfile.empty()) WriteJPEGFile(lutsmpfile.c_str(), imgs[i]);
903  }
904 
906  if (qtrk->cfg.qi_iterations > 0)
907  locMode |= LT_QI;
908  if (zlutAlign)
909  locMode |= LT_ZLUTAlign;
910 
911  qtrk->SetLocalizationMode((LocMode_t)locMode);
912  double tstart=GetPreciseTime();
913 
914  for (uint i=0;i<inputPos.size();i++)
915  {
916  LocalizationJob job(i, 0, 0, 0);
917  qtrk->ScheduleImageData(&imgs[i], &job);
918  }
919 
920  WaitForFinish(qtrk, inputPos.size());
921  double tend = GetPreciseTime();
922 
923  std::vector<vector3f> results(inputPos.size());
924  for (uint i=0;i<inputPos.size();i++) {
926  qtrk->FetchResults(&r,1);
927  results[r.job.frame]=r.pos;
928  }
929  vector3f meanErr, stdevErr;
930  MeanStDevError(inputPos, results, meanErr, stdevErr);
931  dbgprintf("Mean err X=%f,Z=%f. St deviation: X=%f,Z=%f\n", meanErr.x,meanErr.y,stdevErr.x,stdevErr.z);
932 
933  if (!crlboutput.empty())
934  WriteTrace(crlboutput, &crlb[0], crlb.size());
935 
936  WriteTrace(outputfile, &results[0], inputPos.size());
937 
938  if (lut.data) lut.free();
939  delete qtrk;
940 
941  return 0;
942 }
void GenerateLUT(ImageData *lut)
float zlut_minradius
Distance in pixels from the bead center from which to start sampling profiles. Default 1...
Definition: qtrk_c_api.h:135
void GenerateSample(ImageData *image, vector3f pos, float minRadius, float maxRadius)
static double WaitForFinish(QueuedTracker *qtrk, int N)
Definition: SharedTests.h:300
float qi_minradius
Distance in pixels from the bead center from which to start sampling profiles. Default 1...
Definition: qtrk_c_api.h:141
int width
Width of regions of interest to be handled. Typically equals height (square ROI). ...
Definition: qtrk_c_api.h:106
void ResampleLUT(T *qtrk, ImageData *lut, int zplanes, ImageData *newlut, const char *jpgfile=0, uint buildLUTFlags=0)
Definition: SharedTests.h:35
int downsample
Image downsampling factor. Applied before anything else. 0 = original, 1 = 1x (W=W/2,H=H/2).
Definition: qtrk_c_api.h:166
std::vector< std::vector< float > > ReadCSV(const char *filename, char sep)
Definition: utils.cpp:463
unsigned int uint
Definition: std_incl.h:127
void GenerateImageFromLUT(ImageData *image, ImageData *zlut, float minradius, float maxradius, vector3f pos, bool splineInterp, int oversampleSubdiv)
Definition: utils.cpp:354
void WriteJPEGFile(uchar *data, int w, int h, const char *filename, int quality)
Definition: fastjpg.cpp:89
Structure for job results.
Definition: qtrk_c_api.h:67
int height
Height of regions of interest to be handled. Typically equals width (square ROI). ...
Definition: qtrk_c_api.h:107
void WriteTrace(std::string filename, vector3f *results, int nResults)
Definition: utils.cpp:507
Structure for the settings used by the algorithms implemented in QueuedTracker.
Definition: qtrk_c_api.h:82
int zlut_radialsteps
Number of radial steps to sample on.
Definition: qtrk_c_api.h:198
std::vector< vector3f > ReadVector3CSV(const char *file, char sep)
Definition: utils.cpp:494
virtual void SetLocalizationMode(LocMode_t locType)=0
Select which algorithm is to be used.
uint frame
Frame number this ROI belongs to.
Definition: qtrk_c_api.h:56
static TImageData alloc(int w, int h)
Definition: utils.h:110
double GetPreciseTime()
Definition: utils.cpp:669
int LocMode_t
Definition: qtrk_c_api.h:30
virtual void SetRadialZLUT(float *data, int count, int planes)=0
Set the radial lookup tables to be used for z tracking.
vector3f pos
Final 3D position found. If no z localization was performed, the value of z will be 0...
Definition: qtrk_c_api.h:69
Enable z localization.
Definition: qtrk_c_api.h:21
Normalize found radial profiles.
Definition: qtrk_c_api.h:22
LocalizationJob job
Job metadata. See LocalizationJob.
Definition: qtrk_c_api.h:68
CPU implementation of the QueuedTracker interface.
QTrkComputedConfig cfg
The settings used by this instance of QueuedTracker.
void free()
Definition: utils.h:111
int ReadJPEGFile(uchar *srcbuf, int srclen, uchar **data, int *width, int *height)
Definition: fastjpg.cpp:12
float zlut_roi_coverage
Factor of the ROI to include in sampling. Between 0 and 1, default 1. Maxradius = ROI/2*roi_coverage...
Definition: qtrk_c_api.h:138
static void MeanStDevError(const std::vector< vector3f > &truepos, const std::vector< vector3f > &v, vector3f &meanErr, vector3f &stdev)
Definition: SharedTests.h:318
float qi_radial_coverage
Sampling points per radial pixel. Default 3.0.
Definition: qtrk_c_api.h:142
float zlut_angular_coverage
Factor of the sampling perimeter to cover with angular sampling steps. Between 0 and 1...
Definition: qtrk_c_api.h:137
int h
Definition: utils.h:81
void check_arg(const std::vector< std::string > &args, const char *name, T *param)
Definition: test.cu:743
COM+QI.
Definition: qtrk_c_api.h:9
float qi_roi_coverage
Factor of the ROI to include in sampling. Between 0 and 1, default 1. Maxradius = ROI/2*roi_coverage...
Definition: qtrk_c_api.h:144
vector3< float > vector3f
Definition: std_incl.h:114
void dbgprintf(const char *fmt,...)
Definition: utils.cpp:149
void check_strarg(const std::vector< std::string > &args, const char *name, std::string *param)
Definition: test.cu:753
int qi_iterations
Number of times to run the QI algorithm, sampling around the last found position. ...
Definition: qtrk_c_api.h:140
CUDA implementation of the QueuedTracker interface.
Enable ZLUT align.
Definition: qtrk_c_api.h:19
float qi_angstep_factor
Factor to reduce angular steps on lower iterations. Default 1.0 (no effect).
Definition: qtrk_c_api.h:157
void Load(ImageData *lut)
virtual int FetchResults(LocalizationResult *results, int maxResults)=0
Fetch available results.
virtual void SetRadialWeights(float *zcmp)=0
Set radial weights used for comparing LUT profiles.
Abstract tracker interface, implemented by QueuedCUDATracker and QueuedCPUTracker.
Definition: QueuedTracker.h:86
void ScheduleImageData(ImageData *data, const LocalizationJob *jobInfo)
Quick function to schedule a single ROI from an ImageData object.
float zlut_radial_coverage
Sampling points per radial pixel. Default 3.0.
Definition: qtrk_c_api.h:136
float qi_angular_coverage
Factor of the sampling perimeter to cover with angular sampling steps. Between 0 and 1...
Definition: qtrk_c_api.h:143
Structure for region of interest metadata.
Definition: qtrk_c_api.h:49
float zlut_maxradius
Max radius in pixels of the sampling circle.
Definition: qtrk_c_api.h:200
Do a ZLUT lookup with adjusted weights, for testing purposes.
Definition: qtrk_c_api.h:25
T * data
Definition: utils.h:80
int w
Definition: utils.h:81
void ApplyPoissonNoise(ImageData &img, float poissonMax, float maxval)
Definition: utils.cpp:432

§ CompareAccuracy()

void CompareAccuracy ( const char *  lutfile)

Definition at line 459 of file test.cu.

460 {
461  QTrkSettings cfg;
462  cfg.width=150;
463  cfg.height=150;
464  cfg.numThreads=1;
465 
466  auto cpu = RunTracker<QueuedCPUTracker> (lutfile, &cfg, false, "cpu", LT_QI);
467  auto gpu = RunTracker<QueuedCUDATracker>(lutfile, &cfg, false, "gpu", LT_QI);
468 // auto cpugc = RunTracker<QueuedCPUTracker>(lutfile, &cfg, true, "cpugc");
469 // auto gpugc = RunTracker<QueuedCUDATracker>(lutfile, &cfg, true, "gpugc");
470 
471  for (int i=0;i<std::min((int)cpu.output.size(),20);i++) {
472  dbgprintf("CPU-GPU: %f, %f\n", cpu.output[i].x-gpu.output[i].x,cpu.output[i].y-gpu.output[i].y);
473  }
474 
475 /* dbgprintf("CPU\tGPU\tCPU(gc)\tGPU(gc)\n");
476  dbgprintf("St Dev. : CPU: %.2f\tGPU: %.2f\tCPU(gc)%.2f\tGPU(gc)%.2f\n", StDev(cpu).x, StDev(gpu).x, StDev(cpugc).x, StDev(gpugc).x);
477  dbgprintf("Mean err: CPU: %.2f\tGPU: %.2f\tCPU(gc)%.2f\tGPU(gc)%.2f\n", Mean(cpu).x, Mean(gpu).x, Mean(cpugc).x, Mean(gpugc).x);
478 */
479 }
int width
Width of regions of interest to be handled. Typically equals height (square ROI). ...
Definition: qtrk_c_api.h:106
int height
Height of regions of interest to be handled. Typically equals width (square ROI). ...
Definition: qtrk_c_api.h:107
Structure for the settings used by the algorithms implemented in QueuedTracker.
Definition: qtrk_c_api.h:82
COM+QI.
Definition: qtrk_c_api.h:9
void dbgprintf(const char *fmt,...)
Definition: utils.cpp:149
int numThreads
Number of threads/streams to use. Defaults differ between CPU and GPU implementations.
Definition: qtrk_c_api.h:114

§ compute()

__device__ float compute ( int  idx,
float *  buf,
int  s 
)

Definition at line 64 of file test.cu.

65 {
66  // some random calcs to make the kernel unempty
67  float k=0.0f;
68  for (int x=0;x<s;x++ ){
69  k+=cosf(x*0.1f*idx);
70  buf[x]=k;
71  }
72  for (int x=0;x<s/2;x++){
73  buf[x]=buf[x]*buf[x];
74  }
75  float sum=0.0f;
76  for (int x=s-1;x>=1;x--) {
77  sum += buf[x-1]/(fabsf(buf[x])+0.1f);
78  }
79  return sum;
80 }

§ emptyKernel()

__global__ void emptyKernel ( )

Definition at line 312 of file test.cu.

313 {}

§ getPath()

std::string getPath ( const char *  file)

Definition at line 39 of file test.cu.

40 {
41  std::string s = file;
42  int pos = s.length()-1;
43  while (pos>0 && s[pos]!='\\' && s[pos]!= '/' )
44  pos--;
45 
46  return s.substr(0, pos);
47 }

§ listDevices()

void listDevices ( )

Definition at line 255 of file test.cu.

256 {
257  cudaDeviceProp prop;
258  int dc;
259  cudaGetDeviceCount(&dc);
260  for (int k=0;k<dc;k++) {
261  cudaGetDeviceProperties(&prop, k);
262  dbgprintf("Device[%d] = %s\n", k, prop.name);
263  dbgprintf("\tMax texture width: %d\n" ,prop.maxTexture2D[0]);
264  }
265 
266 }
void dbgprintf(const char *fmt,...)
Definition: utils.cpp:149

§ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 1036 of file test.cu.

1037 {
1038  //listDevices();
1039 
1040  printf("%d, %d\n",sizeof(long),sizeof(int));
1041 
1042  if (argc > 1)
1043  {
1044  return CmdLineRun(argc, argv);
1045  }
1046 
1047  try {
1048  // outputter output(Files+Images);
1049  // BuildZLUT("C:\\TestImages\\TestMovie150507_2\\images\\jpg\\Zstack\\", &output);
1050  BasicQTrkTest();
1051  // BasicQTrkTest_RM();
1052 
1053 
1054  // TestBenchmarkLUT();
1055  // testLinearArray();
1056  // TestTextureFetch();
1057  // TestGauss2D(true);
1058  // MultipleLUTTest();
1059 
1060  // TestSurfaceReadWrite();
1061  // TestImage4D();
1062  // TestImage4DMemory();
1063  // TestImageLUT("../cputrack-test/lut000.jpg");
1064  // TestRadialLUTGradientMethod();
1065 
1066  // BenchmarkParams();
1067  // TestTextureFetch();
1068  // QICompare("../cputrack-test/lut000.jpg");
1069  // TestCMOSNoiseInfluence<QueuedCUDATracker>("../cputrack-test/lut000.jpg");
1070 
1071  // CompareAccuracy("../cputrack-test/lut000.jpg");
1072  // QTrkCompareTest();
1073  /*
1074  ProfileSpeedVsROI(LT_OnlyCOM, "speeds-com.txt", false, 0);
1075  ProfileSpeedVsROI(LT_OnlyCOM, "speeds-com-z.txt", true, 0);
1076  ProfileSpeedVsROI(LT_XCor1D, "speeds-xcor.txt", true, 0);
1077  for (int qi_it=1;qi_it<=4;qi_it++) {
1078  ProfileSpeedVsROI(LT_QI, SPrintf("speeds-qi-%d-iterations.txt",qi_it).c_str(), true, qi_it);
1079  }*/
1080 
1081  /* auto info = SpeedCompareTest(80, false);
1082  auto infogc = SpeedCompareTest(80, true);
1083  dbgprintf("[gainc=false] CPU: %f, GPU: %f\n", info.speed_cpu, info.speed_gpu);
1084  dbgprintf("[gainc=true] CPU: %f, GPU: %f\n", infogc.speed_cpu, infogc.speed_gpu);
1085  */
1086  } catch (const std::exception& e) {
1087  dbgprintf("Exception: %s\n", e.what());
1088  }
1089  system("pause");
1090  return 0;
1091 }
int CmdLineRun(int argc, char *argv[])
Definition: test.cu:763
void BasicQTrkTest()
Definition: test.cu:535
void dbgprintf(const char *fmt,...)
Definition: utils.cpp:149

§ mul_conjugate()

__device__ float2 mul_conjugate ( float2  a,
float2  b 
)
inline

Definition at line 49 of file test.cu.

50 {
51  float2 r;
52  r.x = a.x*b.x + a.y*b.y;
53  r.y = a.y*b.x - a.x*b.y;
54  return r;
55 }

§ NearestPowerOfTwo()

int NearestPowerOfTwo ( int  v)

Definition at line 378 of file test.cu.

379 {
380  int r=1;
381  while (r < v)
382  r *= 2;
383  if ( fabsf(r-v) < fabsf(r/2-v) )
384  return r;
385  return r/2;
386 }

§ ProfileSpeedVsROI()

void ProfileSpeedVsROI ( LocalizeModeEnum  locMode,
const char *  outputcsv,
bool  haveZLUT,
int  qi_iterations 
)

Definition at line 444 of file test.cu.

445 {
446  std::vector<float> values;
447 
448  for (int roi=20;roi<=180;roi+=10) { // same as BenchmarkROIAccuracy()
449  SpeedInfo info = SpeedCompareTest(roi, locMode, haveZLUT, qi_iterations);
450  values.push_back( roi);
451  values.push_back(info.speed_cpu);
452  values.push_back( info.speed_gpu);
453  }
454 
455  const char *labels[] = { "ROI", "CPU", "CUDA" };
456  WriteImageAsCSV(outputcsv, &values[0], 3, values.size()/3, labels);
457 }
SpeedInfo SpeedCompareTest(int w, LocalizeModeEnum locMode, bool haveZLUT, int qi_iterations=5)
Definition: test.cu:401
float speed_gpu
Definition: test.cu:397
float speed_cpu
Definition: test.cu:397
void WriteImageAsCSV(const char *file, float *d, int w, int h, const char *labels[])
Definition: utils.cpp:551

§ QICompare()

void QICompare ( const char *  lutfile)

Definition at line 674 of file test.cu.

675 {
676  QTrkSettings cfg;
677  cfg.qi_iterations=1;
678  cfg.width = 150;
679  cfg.height = 150;
680  cfg.numThreads=1;
681  QueuedCUDATracker gpu(cfg, 1);
682  QueuedCPUTracker cpu(cfg);
683 
684  ImageData lut=ReadJPEGFile(lutfile);
685  ImageData img=ImageData::alloc(cfg.width,cfg.height);
686 
687  srand(0);
688  const int N=1;
689  gpu.SetLocalizationMode(LT_QI);
690  cpu.SetLocalizationMode(LT_QI);
691  for (int i=0;i<N;i++) {
692  LocalizationJob job(i, 0, 0, 0);
693  vector3f pos(cfg.width/2,cfg.height/2, lut.h/2);
694  pos.x += rand_uniform<float>();
695  pos.y += rand_uniform<float>();
696  GenerateImageFromLUT(&img, &lut, 1, cfg.width/2, pos);
697  gpu.ScheduleLocalization( (uchar*)img.data, sizeof(float)*img.w, QTrkFloat, &job);
698  cpu.ScheduleLocalization( (uchar*)img.data, sizeof(float)*img.w, QTrkFloat, &job);
699  }
700  gpu.Flush();
701  cpu.Flush();
702  while(cpu.GetResultCount() != N || gpu.GetResultCount() != N );
703 
704  ImageData dbgImg = cpu.DebugImage(0);
705  FloatToJPEGFile("qidbgimg.jpg", dbgImg.data, dbgImg.w, dbgImg.h);
706 
707  auto rcpu = FetchResults(&cpu), rgpu = FetchResults(&gpu);
708  for (int i=0;i<N;i++) {
709  vector3f d=rcpu[i]-rgpu[i];
710  dbgprintf("[%d]: CPU: x=%f, y=%f. GPU: x=%f, y=%f.\tGPU-CPU: x:%f, y:%f\n", i, rcpu[i].x, rcpu[i].y, rgpu[i].x, rgpu[i].y, d.x,d.y);
711  }
712 
713  // Profiles
714  for(uint i=0;i<cmp_cpu_qi_prof.size();i++) {
715  dbgprintf("QIPROF[%d]. CPU=%f, GPU=%f, Diff: %f\n", i, cmp_cpu_qi_prof[i], cmp_gpu_qi_prof[i], cmp_gpu_qi_prof[i]-cmp_cpu_qi_prof[i]);
716  }
717  // FFT out
718  for(uint i=0;i<cmp_cpu_qi_fft_out.size();i++) {
719  dbgprintf("fft-out[%d]. CPU=%f, GPU=%f, Diff: %f\n", i, cmp_cpu_qi_fft_out[i].real(), cmp_gpu_qi_fft_out[i].real(), cmp_gpu_qi_fft_out[i].real()-cmp_cpu_qi_fft_out[i].real());
720  }
721 
722  img.free();
723  lut.free();
724 }
std::vector< std::complex< float > > cmp_gpu_qi_fft_out
Definition: test.cu:672
64 bit float
Definition: qtrk_c_api.h:37
int width
Width of regions of interest to be handled. Typically equals height (square ROI). ...
Definition: qtrk_c_api.h:106
std::vector< float > cmp_cpu_qi_prof
Definition: test.cu:668
unsigned int uint
Definition: std_incl.h:127
void GenerateImageFromLUT(ImageData *image, ImageData *zlut, float minradius, float maxradius, vector3f pos, bool splineInterp, int oversampleSubdiv)
Definition: utils.cpp:354
std::vector< std::complex< float > > cmp_cpu_qi_fft_out
Definition: test.cu:671
int height
Height of regions of interest to be handled. Typically equals width (square ROI). ...
Definition: qtrk_c_api.h:107
Structure for the settings used by the algorithms implemented in QueuedTracker.
Definition: qtrk_c_api.h:82
static TImageData alloc(int w, int h)
Definition: utils.h:110
CPU implementation of the QueuedTracker interface.
void free()
Definition: utils.h:111
int ReadJPEGFile(uchar *srcbuf, int srclen, uchar **data, int *width, int *height)
Definition: fastjpg.cpp:12
int h
Definition: utils.h:81
static std::vector< vector3f > FetchResults(QueuedTracker *trk)
Definition: SharedTests.h:68
COM+QI.
Definition: qtrk_c_api.h:9
void dbgprintf(const char *fmt,...)
Definition: utils.cpp:149
int qi_iterations
Number of times to run the QI algorithm, sampling around the last found position. ...
Definition: qtrk_c_api.h:140
CUDA implementation of the QueuedTracker interface.
std::vector< float > cmp_gpu_qi_prof
Definition: test.cu:669
void FloatToJPEGFile(const char *name, const float *d, int w, int h)
Definition: fastjpg.cpp:189
int numThreads
Number of threads/streams to use. Defaults differ between CPU and GPU implementations.
Definition: qtrk_c_api.h:114
Structure for region of interest metadata.
Definition: qtrk_c_api.h:49
unsigned char uchar
Definition: std_incl.h:130
int w
Definition: utils.h:81
T * data
Definition: utils.h:80

§ QTrkCompareTest()

void QTrkCompareTest ( )

Definition at line 119 of file test.cu.

120 {
121  QTrkSettings cfg;
122  cfg.width = cfg.height = 40;
123  cfg.qi_iterations = 1;
124  cfg.xc1_iterations = 2;
125  cfg.xc1_profileLength = 64;
126  cfg.numThreads = -1;
127  cfg.com_bgcorrection = 0.0f;
128  bool haveZLUT = false;
129 #ifdef _DEBUG
130  cfg.numThreads = 2;
131  cfg.qi_iterations=1;
132  int total= 10;
133  int batchSize = 2;
134  haveZLUT=false;
135 #else
136  cfg.numThreads = 4;
137  int total = 10000;
138  int batchSize = 512;
139 #endif
140 
141  QueuedCUDATracker qtrk(cfg, batchSize);
142  QueuedCPUTracker qtrkcpu(cfg);
143  ImageData img = ImageData::alloc(cfg.width,cfg.height);
144  bool cpucmp = true;
145 
146  qtrk.EnableTextureCache(true);
147 
148  srand(1);
149 
150  // Generate ZLUT
151  int zplanes=100;
152  float zmin=0.5,zmax=3;
153  qtrk.SetRadialZLUT(0, 1, zplanes);
154  if (cpucmp) qtrkcpu.SetRadialZLUT(0, 1, zplanes);
155  if (haveZLUT) {
156  for (int x=0;x<zplanes;x++) {
157  vector2f center ( cfg.width/2, cfg.height/2 );
158  float s = zmin + (zmax-zmin) * x/(float)(zplanes-1);
159  GenerateTestImage(img, center.x, center.y, s, 0.0f);
160  WriteJPEGFile("qtrkzlutimg.jpg", img);
161 
162  qtrk.BuildLUT(img.data,img.pitch(),QTrkFloat, 0, (vector2f*)(0));
163  if (cpucmp)
164  qtrkcpu.BuildLUT(img.data,img.pitch(),QTrkFloat, 0);
165  }
166  qtrk.FinalizeLUT();
167  if (cpucmp) qtrkcpu.FinalizeLUT();
168  // wait to finish ZLUT
169  while(true) {
170  int rc = qtrk.GetResultCount();
171  if (rc == zplanes) break;
172  Sleep(100);
173  dbgprintf(".");
174  }
175  if (cpucmp) {
176  while(qtrkcpu.GetResultCount() != zplanes);
177  }
178  }
179  float* zlut = new float[qtrk.cfg.zlut_radialsteps*zplanes];
180  qtrk.GetRadialZLUT(zlut);
181  if (cpucmp) {
182  float* zlutcpu = new float[qtrkcpu.cfg.zlut_radialsteps*zplanes];
183  qtrkcpu.GetRadialZLUT(zlutcpu);
184 
185  WriteImageAsCSV("zlut-cpu.txt", zlutcpu, qtrkcpu.cfg.zlut_radialsteps, zplanes);
186  WriteImageAsCSV("zlut-gpu.txt", zlut, qtrkcpu.cfg.zlut_radialsteps, zplanes);
187  delete[] zlutcpu;
188  }
189  qtrk.ClearResults();
190  if (cpucmp) qtrkcpu.ClearResults();
191  FloatToJPEGFile ("qtrkzlutcuda.jpg", zlut, qtrk.cfg.zlut_radialsteps, zplanes);
192  delete[] zlut;
193 
194  // Schedule images to localize on
195  dbgprintf("Benchmarking...\n", total);
196  GenerateTestImage(img, cfg.width/2, cfg.height/2, (zmin+zmax)/2, 0);
197  double tstart = GetPreciseTime();
198  int rc = 0, displayrc=0;
199  LocMode_t flags = (LocMode_t)(LT_NormalizeProfile |LT_QI| (haveZLUT ? LT_LocalizeZ : 0) );
200  qtrk.SetLocalizationMode(flags);
201  qtrkcpu.SetLocalizationMode(flags);
202  for (int n=0;n<total;n++) {
203  LocalizationJob jobInfo;
204  jobInfo.frame = n;
205  jobInfo.zlutIndex = 0;
206  qtrk.ScheduleLocalization((uchar*)img.data, cfg.width*sizeof(float), QTrkFloat,&jobInfo);
207  if (cpucmp) qtrkcpu.ScheduleLocalization((uchar*)img.data, cfg.width*sizeof(float), QTrkFloat, &jobInfo);
208  if (n % 10 == 0) {
209  rc = qtrk.GetResultCount();
210  while (displayrc<rc) {
211  if( displayrc%(total/10)==0) dbgprintf("Done: %d / %d\n", displayrc, total);
212  displayrc++;
213  }
214  }
215  }
216  if (cpucmp) qtrkcpu.Flush();
217  WaitForFinish(&qtrk, total);
218 
219  // Measure speed
220  double tend = GetPreciseTime();
221 
222  if (cpucmp) {
223  dbgprintf("waiting for cpu results..\n");
224  while (total != qtrkcpu.GetResultCount())
225  Sleep(10);
226  }
227 
228 
229  img.free();
230 
231  const int NumResults = 20;
232  LocalizationResult results[NumResults], resultscpu[NumResults];
233  int rcount = std::min(NumResults,total);
234  for (int i=0;i<rcount;i++) {
235  qtrk.FetchResults(&results[i], 1);
236  if (cpucmp) qtrkcpu.FetchResults(&resultscpu[i], 1);
237  }
238 
239  // if you wonder about this syntax, google C++ lambda functions
240  std::sort(results, results+rcount, [](LocalizationResult a, LocalizationResult b) -> bool { return a.job.frame > b.job.frame; });
241  if(cpucmp) std::sort(resultscpu, resultscpu+rcount, [](LocalizationResult a, LocalizationResult b) -> bool { return a.job.frame > b.job.frame; });
242  for (int i=0;i<rcount;i++) {
243  LocalizationResult& r = results[i];
244  dbgprintf("gpu [%d] x: %f, y: %f. z: %+g, COM: %f, %f\n", i,r.pos.x, r.pos.y, r.pos.z, r.firstGuess.x, r.firstGuess.y);
245 
246  if (cpucmp) {
247  r = resultscpu[i];
248  dbgprintf("cpu [%d] x: %f, y: %f. z: %+g, COM: %f, %f\n", i,r.pos.x, r.pos.y, r.pos.z, r.firstGuess.x, r.firstGuess.y);
249  }
250  }
251 
252  dbgprintf("Localization Speed: %d (img/s)\n", (int)( total/(tend-tstart) ));
253 }
void GenerateTestImage(ImageData &img, float xp, float yp, float size, float SNratio)
Definition: utils.cpp:162
static double WaitForFinish(QueuedTracker *qtrk, int N)
Definition: SharedTests.h:300
64 bit float
Definition: qtrk_c_api.h:37
int width
Width of regions of interest to be handled. Typically equals height (square ROI). ...
Definition: qtrk_c_api.h:106
int zlutIndex
Bead number of this ROI. Used to get the right ZLUT from memory.
Definition: qtrk_c_api.h:58
void WriteJPEGFile(uchar *data, int w, int h, const char *filename, int quality)
Definition: fastjpg.cpp:89
Structure for job results.
Definition: qtrk_c_api.h:67
int height
Height of regions of interest to be handled. Typically equals width (square ROI). ...
Definition: qtrk_c_api.h:107
Structure for the settings used by the algorithms implemented in QueuedTracker.
Definition: qtrk_c_api.h:82
uint frame
Frame number this ROI belongs to.
Definition: qtrk_c_api.h:56
static TImageData alloc(int w, int h)
Definition: utils.h:110
double GetPreciseTime()
Definition: utils.cpp:669
int LocMode_t
Definition: qtrk_c_api.h:30
vector3f pos
Final 3D position found. If no z localization was performed, the value of z will be 0...
Definition: qtrk_c_api.h:69
Enable z localization.
Definition: qtrk_c_api.h:21
float com_bgcorrection
Background correction factor for COM. Defines the number of standard deviations data needs to be away...
Definition: qtrk_c_api.h:133
int pitch() const
Definition: utils.h:100
Normalize found radial profiles.
Definition: qtrk_c_api.h:22
LocalizationJob job
Job metadata. See LocalizationJob.
Definition: qtrk_c_api.h:68
CPU implementation of the QueuedTracker interface.
void free()
Definition: utils.h:111
vector2f firstGuess
(x,y) position found by the COM localization. Used as initial position for the subsequent algorithms...
Definition: qtrk_c_api.h:71
COM+QI.
Definition: qtrk_c_api.h:9
void dbgprintf(const char *fmt,...)
Definition: utils.cpp:149
int qi_iterations
Number of times to run the QI algorithm, sampling around the last found position. ...
Definition: qtrk_c_api.h:140
CUDA implementation of the QueuedTracker interface.
int xc1_iterations
Number of times to run the cross correlation algorithm.
Definition: qtrk_c_api.h:161
void FloatToJPEGFile(const char *name, const float *d, int w, int h)
Definition: fastjpg.cpp:189
void WriteImageAsCSV(const char *file, float *d, int w, int h, const char *labels[])
Definition: utils.cpp:551
int numThreads
Number of threads/streams to use. Defaults differ between CPU and GPU implementations.
Definition: qtrk_c_api.h:114
Structure for region of interest metadata.
Definition: qtrk_c_api.h:49
int xc1_profileLength
Profile length for the cross correlation.
Definition: qtrk_c_api.h:159
unsigned char uchar
Definition: std_incl.h:130
T * data
Definition: utils.h:80

§ ShowCUDAError()

void ShowCUDAError ( )

Definition at line 57 of file test.cu.

57  {
58  cudaError_t err = cudaGetLastError();
59  dbgprintf("Cuda error: %s\n", cudaGetErrorString(err));
60 }
void dbgprintf(const char *fmt,...)
Definition: utils.cpp:149

§ SimpleKernel()

__global__ void SimpleKernel ( int  N,
float *  a 
)

Definition at line 268 of file test.cu.

268  {
269  int idx = blockIdx.x * blockDim.x + threadIdx.x;
270  if (idx < N) {
271  for (int x=0;x<1000;x++)
272  a[idx] = asin(a[idx]+x);
273  }
274 }

§ SmallestPowerOfTwo()

int SmallestPowerOfTwo ( int  minval)

Definition at line 388 of file test.cu.

389 {
390  int r=1;
391  while (r < minval)
392  r *= 2;
393  return r;
394 }

§ SpeedCompareTest()

SpeedInfo SpeedCompareTest ( int  w,
LocalizeModeEnum  locMode,
bool  haveZLUT,
int  qi_iterations = 5 
)

Definition at line 401 of file test.cu.

402 {
403  int cudaBatchSize = 1024;
404  int count = 60000;
405 
406 #ifdef _DEBUG
407  count = 100;
408  cudaBatchSize = 32;
409 #endif
410  LocMode_t locType = (LocMode_t)( locMode|LT_NormalizeProfile );
411 
412  QTrkComputedConfig cfg;
413  cfg.width = cfg.height = w;
414  cfg.qi_iterations = qi_iterations;
415  cfg.qi_radial_coverage = 1.5f;
416  cfg.qi_angstep_factor = 1.5f;
417  cfg.qi_angular_coverage = 0.7f;
418  cfg.zlut_radial_coverage = 2.0f;
419  //std::vector<int> devices(1); devices[0]=1;
420  //SetCUDADevices(devices);
422  cfg.numThreads = -1;
423  cfg.com_bgcorrection = 0.0f;
424  cfg.Update();
425  dbgprintf("Width: %d, QI radius: %f, radialsteps: %d\n", w, cfg.qi_maxradius, cfg.qi_radialsteps);
426 
427  SpeedInfo info;
428  QueuedCPUTracker *cputrk = new QueuedCPUTracker(cfg);
429  info.speed_cpu = SpeedTest(cfg, cputrk, count, haveZLUT, locType, &info.sched_cpu, false);
430  delete cputrk;
431 
432  QueuedCUDATracker *cudatrk = new QueuedCUDATracker(cfg, cudaBatchSize);
433  info.speed_gpu = SpeedTest(cfg, cudatrk, count, haveZLUT, locType, &info.sched_gpu, false);
434  //info.speed_gpu = SpeedTest(cfg, cudatrk, count, haveZLUT, locType, &info.sched_gpu);
435  std::string report = cudatrk->GetProfileReport();
436  delete cudatrk;
437 
438  dbgprintf("CPU tracking speed: %d img/s\n", (int)info.speed_cpu);
439  dbgprintf("GPU tracking speed: %d img/s\n", (int)info.speed_gpu);
440 
441  return info;
442 }
float SpeedTest(const QTrkSettings &cfg, QueuedTracker *qtrk, int count, bool haveZLUT, LocMode_t locType, float *scheduleTime, bool gaincorrection=false)
Definition: test.cu:315
int width
Width of regions of interest to be handled. Typically equals height (square ROI). ...
Definition: qtrk_c_api.h:106
float speed_gpu
Definition: test.cu:397
int height
Height of regions of interest to be handled. Typically equals width (square ROI). ...
Definition: qtrk_c_api.h:107
float sched_gpu
Definition: test.cu:398
int LocMode_t
Definition: qtrk_c_api.h:30
void Update()
Compute the derived settings.
float com_bgcorrection
Background correction factor for COM. Defines the number of standard deviations data needs to be away...
Definition: qtrk_c_api.h:133
std::string GetProfileReport() override
Get the output of performance profiling.
float qi_maxradius
Max radius in pixels of the sampling circle.
Definition: qtrk_c_api.h:204
Normalize found radial profiles.
Definition: qtrk_c_api.h:22
float speed_cpu
Definition: test.cu:397
CPU implementation of the QueuedTracker interface.
int cuda_device
CUDA only. Flag for device selection.
Definition: qtrk_c_api.h:131
float qi_radial_coverage
Sampling points per radial pixel. Default 3.0.
Definition: qtrk_c_api.h:142
#define QTrkCUDA_UseAll
Definition: qtrk_c_api.h:117
Structure for derived settings computed from base settings in QTrkSettings.
Definition: qtrk_c_api.h:189
int qi_radialsteps
Number of radial steps to sample on.
Definition: qtrk_c_api.h:202
void dbgprintf(const char *fmt,...)
Definition: utils.cpp:149
int qi_iterations
Number of times to run the QI algorithm, sampling around the last found position. ...
Definition: qtrk_c_api.h:140
CUDA implementation of the QueuedTracker interface.
float qi_angstep_factor
Factor to reduce angular steps on lower iterations. Default 1.0 (no effect).
Definition: qtrk_c_api.h:157
float sched_cpu
Definition: test.cu:398
int numThreads
Number of threads/streams to use. Defaults differ between CPU and GPU implementations.
Definition: qtrk_c_api.h:114
float zlut_radial_coverage
Sampling points per radial pixel. Default 3.0.
Definition: qtrk_c_api.h:136
float qi_angular_coverage
Factor of the sampling perimeter to cover with angular sampling steps. Between 0 and 1...
Definition: qtrk_c_api.h:143

§ SpeedTest()

float SpeedTest ( const QTrkSettings cfg,
QueuedTracker qtrk,
int  count,
bool  haveZLUT,
LocMode_t  locType,
float *  scheduleTime,
bool  gaincorrection = false 
)

qtrk->ScheduleLocalization((uchar*)image, cfg.width*sizeof(float), QTrkFloat, flags, n, 0, 0, 0, 0);

Definition at line 315 of file test.cu.

316 {
317  ImageData img=ImageData::alloc(cfg.width,cfg.height);
318  srand(1);
319 
320  // Generate ZLUT
321  int zplanes=100;
322  float zmin=0.5,zmax=3;
323  qtrk->SetRadialZLUT(0, 1, zplanes);
324  if (gaincorrection) EnableGainCorrection(qtrk);
325  if (haveZLUT) {
326  for (int x=0;x<zplanes;x++) {
327  vector2f center( cfg.width/2, cfg.height/2 );
328  float s = zmin + (zmax-zmin) * x/(float)(zplanes-1);
329  GenerateTestImage(img, center.x, center.y, s, 0.0f);
330  qtrk->BuildLUT(img.data,img.pitch(),QTrkFloat, 0);
331  }
332  qtrk->FinalizeLUT();
333  }
334  qtrk->ClearResults();
335 
336  // Schedule images to localize on
337  dbgprintf("Benchmarking...\n", count);
338  GenerateTestImage(img, cfg.width/2, cfg.height/2, (zmin+zmax)/2, 0);
339  double tstart = GetPreciseTime();
340  int rc = 0, displayrc=0;
341  double maxScheduleTime = 0.0f;
342  double sumScheduleTime2 = 0.0f;
343  double sumScheduleTime = 0.0f;
344  qtrk->SetLocalizationMode(locType| (haveZLUT ? LT_LocalizeZ : 0));
345  for (int n=0;n<count;n++) {
346  double t0 = GetPreciseTime();
348  ROIPosition roipos[]={ {0,0} };
349  LocalizationJob job(n, 0, 0,0);
350  qtrk->ScheduleFrame((uchar*)img.data, cfg.width*sizeof(float),cfg.width,cfg.height, roipos, 1, QTrkFloat, &job);
351  double dt = GetPreciseTime() - t0;
352  maxScheduleTime = std::max(maxScheduleTime, dt);
353  sumScheduleTime += dt;
354  sumScheduleTime2 += dt*dt;
355 
356  if (n % 10 == 0) {
357  rc = qtrk->GetResultCount();
358  while (displayrc<rc) {
359  if( displayrc%(count/10)==0) dbgprintf("Done: %d / %d\n", displayrc, count);
360  displayrc++;
361  }
362  }
363  }
364  WaitForFinish(qtrk, count);
365 
366  // Measure speed
367  double tend = GetPreciseTime();
368  img.free();
369 
370  float mean = sumScheduleTime / count;
371  float stdev = sqrt(sumScheduleTime2 / count - mean * mean);
372  dbgprintf("Scheduletime: Avg=%f, Max=%f, Stdev=%f\n", mean*1000, maxScheduleTime*1000, stdev*1000);
373  *scheduleTime = mean;
374 
375  return count/(tend-tstart);
376 }
virtual int GetResultCount()=0
Get the number of finished localization jobs (=results) available in memory.
virtual int ScheduleFrame(void *imgptr, int pitch, int width, int height, ROIPosition *positions, int numROI, QTRK_PixelDataType pdt, const LocalizationJob *jobInfo)
Schedule an entire frame at once, allowing for further optimizations.
virtual void BuildLUT(void *data, int pitch, QTRK_PixelDataType pdt, int plane, vector2f *known_pos=0)=0
Add a new lookup table plane.
void GenerateTestImage(ImageData &img, float xp, float yp, float size, float SNratio)
Definition: utils.cpp:162
static double WaitForFinish(QueuedTracker *qtrk, int N)
Definition: SharedTests.h:300
64 bit float
Definition: qtrk_c_api.h:37
int width
Width of regions of interest to be handled. Typically equals height (square ROI). ...
Definition: qtrk_c_api.h:106
static void EnableGainCorrection(QueuedTracker *qtrk)
Definition: SharedTests.h:189
Struct used to define the top-left corner position of an ROI within a frame. ROI is [ x ...
Definition: qtrk_c_api.h:178
int height
Height of regions of interest to be handled. Typically equals width (square ROI). ...
Definition: qtrk_c_api.h:107
vector3< T > sqrt(const vector3< T > &a)
Definition: std_incl.h:112
virtual void FinalizeLUT()=0
Finalize the lookup tables in memory.
virtual void SetLocalizationMode(LocMode_t locType)=0
Select which algorithm is to be used.
static TImageData alloc(int w, int h)
Definition: utils.h:110
double GetPreciseTime()
Definition: utils.cpp:669
virtual void SetRadialZLUT(float *data, int count, int planes)=0
Set the radial lookup tables to be used for z tracking.
Enable z localization.
Definition: qtrk_c_api.h:21
int pitch() const
Definition: utils.h:100
void free()
Definition: utils.h:111
void dbgprintf(const char *fmt,...)
Definition: utils.cpp:149
virtual void ClearResults()=0
Clear results.
Structure for region of interest metadata.
Definition: qtrk_c_api.h:49
unsigned char uchar
Definition: std_incl.h:130
T * data
Definition: utils.h:80

§ TestAsync()

void TestAsync ( )

Definition at line 276 of file test.cu.

277 {
278  int N =100000;
279  int nt = 32;
280 
281  pinned_array<float> a(N);
282 // cudaMallocHost(&a, sizeof(float)*N, 0);
283 
284  device_vec<float> A(N);
285 
286  cudaStream_t s0;
287  cudaEvent_t done;
288 
289  cudaStreamCreate(&s0);
290  cudaEventCreate(&done,0);
291 
292  for (int x=0;x<N;x++)
293  a[x] = cos(x*0.01f);
294 
295  for (int x=0;x<1;x++) {
296  { MeasureTime mt("a->A"); A.copyToDevice(a.data(), N, true); }
297  { MeasureTime mt("func(A)");
298  SimpleKernel<<<dim3( (N+nt-1)/nt ), dim3(nt)>>>(N, A.data);
299  }
300  { MeasureTime mt("A->a"); A.copyToHost(a.data(), true); }
301  }
302  cudaEventRecord(done);
303 
304  {
305  MeasureTime("sync..."); while (cudaEventQuery(done) != cudaSuccess);
306  }
307 
308  cudaStreamDestroy(s0);
309  cudaEventDestroy(done);
310 }
__global__ void SimpleKernel(int N, float *a)
Definition: test.cu:268

§ TestBenchmarkLUT()

void TestBenchmarkLUT ( )

Definition at line 726 of file test.cu.

727 {
728  BenchmarkLUT bml("refbeadlut.jpg");
729 
730  ImageData img=ImageData::alloc(120,120);
731 
732  ImageData lut = ImageData::alloc(bml.lut_w, bml.lut_h);
733  bml.GenerateLUT(&lut);
734  WriteJPEGFile("refbeadlut-lutsmp.jpg", lut);
735  lut.free();
736 
737  bml.GenerateSample(&img, vector3f(img.w/2,img.h/2,bml.lut_h/2), 0, img.w/2-5);
738  WriteJPEGFile("refbeadlut-bmsmp.jpg", img);
739  img.free();
740 }
void WriteJPEGFile(uchar *data, int w, int h, const char *filename, int quality)
Definition: fastjpg.cpp:89
static TImageData alloc(int w, int h)
Definition: utils.h:110
void free()
Definition: utils.h:111
int h
Definition: utils.h:81
vector3< float > vector3f
Definition: std_incl.h:114
int w
Definition: utils.h:81

§ TestGauss2D()

void TestGauss2D ( bool  calib)

Definition at line 648 of file test.cu.

649 {
650  int N=20, R=1000;
651 #ifdef _DEBUG
652  R=1;
653 #endif
654  std::vector<vector3f> rcpu = Gauss2DTest<QueuedCPUTracker>(N, R, calib);
655  std::vector<vector3f> rgpu = Gauss2DTest<QueuedCUDATracker>(N, R, calib);
656 
657  for (int i=0;i<std::min(20,N);i++) {
658  dbgprintf("[%d] CPU: X:%.5f, Y:%.5f\t;\tGPU: X:%.5f, Y:%.5f. \tDiff: X:%.5f, Y:%.5f\n",
659  i, rcpu[i].x, rcpu[i].y, rgpu[i].x, rgpu[i].y, rcpu[i].x-rgpu[i].x, rcpu[i].y-rgpu[i].y);
660  }
661 }
void dbgprintf(const char *fmt,...)
Definition: utils.cpp:149

§ TestRadialLUTGradientMethod()

void TestRadialLUTGradientMethod ( )

Definition at line 663 of file test.cu.

664 {
665 
666 }

§ TestSharedMem()

void TestSharedMem ( )

Definition at line 96 of file test.cu.

97 {
98  int n=100, s=200;
99  dim3 nthreads(32), nblocks( (n+nthreads.x-1)/nthreads.x);
100  device_vec<float> buf(n*s);
101  device_vec<float> result_s(n), result_g(n);
102 
103  double t0 = GetPreciseTime();
104  testWithGlobal<<<nblocks,nthreads>>>(n,s,result_g.data,buf.data);
105  cudaDeviceSynchronize();
106  double t1 = GetPreciseTime();
107  testWithShared <<<nblocks,nthreads,s*sizeof(float)*nthreads.x>>>(n,s,result_s.data);
108  cudaDeviceSynchronize();
109  double t2 = GetPreciseTime();
110 
111  std::vector<float> rs = result_s, rg = result_g;
112  for (int x=0;x<n;x++) {
113  dbgprintf("result_s[%d]=%f. result_g[%d]=%f\n", x,rs[x], x,rg[x]);
114  }
115 
116  dbgprintf("Speed of shared comp: %f, speed of global comp: %f\n", n/(t2-t1), n/(t1-t0));
117 }
double GetPreciseTime()
Definition: utils.cpp:669
__global__ void testWithShared(int n, int s, float *result)
Definition: test.cu:89
void dbgprintf(const char *fmt,...)
Definition: utils.cpp:149

§ testWithGlobal()

__global__ void testWithGlobal ( int  n,
int  s,
float *  result,
float *  buf 
)

Definition at line 82 of file test.cu.

82  {
83  int idx = threadIdx.x + blockIdx.x * blockDim.x;
84  if (idx < n) {
85  result [idx] = compute(idx, &buf [idx * s],s);
86  }
87 }
__device__ float compute(int idx, float *buf, int s)
Definition: test.cu:64

§ testWithShared()

__global__ void testWithShared ( int  n,
int  s,
float *  result 
)

Definition at line 89 of file test.cu.

89  {
90  int idx = threadIdx.x + blockIdx.x * blockDim.x;
91  if (idx < n) {
92  result [idx] = compute(idx, &cudaSharedMem[threadIdx.x * s],s);
93  }
94 }
__device__ float compute(int idx, float *buf, int s)
Definition: test.cu:64
__shared__ float cudaSharedMem[]
Definition: test.cu:62

Variable Documentation

§ cmp_cpu_qi_fft_out

std::vector< std::complex<float> > cmp_cpu_qi_fft_out

Definition at line 671 of file test.cu.

§ cmp_cpu_qi_prof

std::vector< float > cmp_cpu_qi_prof

Definition at line 668 of file test.cu.

§ cmp_gpu_qi_fft_out

std::vector< std::complex<float> > cmp_gpu_qi_fft_out

Definition at line 672 of file test.cu.

§ cmp_gpu_qi_prof

std::vector< float > cmp_gpu_qi_prof

Definition at line 669 of file test.cu.

§ cudaSharedMem

__shared__ float cudaSharedMem[]

Definition at line 62 of file test.cu.