% -*- mode: Noweb; noweb-code-mode: c-mode -*- @ \index{imaging} \section{The files} \subsection{Header} <>= #ifndef __IMAGING_H__ #define __IMAGING_H__ #ifndef __UTILITIES_H__ #include "utilities.h" #endif #ifndef __SOURCE_H__ #include "source.h" #endif #define NRANK 2 struct imaging { <> void setup(int __N_PX_PUPIL, int __N_SIDE_LENSLET, int DFT_osf, float IMAGE_osf, float CAMERA_osf); void setup(int __N_PX_PUPIL, int __N_SIDE_LENSLET, int DFT_osf, float IMAGE_osf, float CAMERA_osf, int __N_SOURCE); void setup(int __N_PX_PUPIL, int __N_SIDE_LENSLET, int DFT_osf, int N_PX_IMAGE_, float CAMERA_osf, int __N_SOURCE); void setup(int __N_PX_PUPIL, int __N_SIDE_LENSLET, int DFT_osf, int N_PX_IMAGE_, int BIN_IMAGE, int __N_SOURCE); void setupSegmentPistonSensor(int __N_PX_PUPIL, int __N_SIDE_LENSLET, int _N_DFT_, int N_PX_IMAGE_, int _BIN_IMAGE_, int __N_SOURCE); void cleanup(void); void cleanupSegmentPistonSensor(void); void set_pointing_direction(float *zen, float *azim); void reset(void); void reset_rng(int SEED); void propagate(source *src); void propagate_cpx(source *src); void propagateNoOverlap(source *src); void propagateNoOverlapBare(source *src); void propagateNoOverlapSPS(source *src, float d, float wavenumber); void propagateTT7(source *src); void propagateTT7(source *src, int *d__piston_mask); void propagateThroughFieldStop(source *src, float field_stop_diam); void propagateThroughPyramid(source *src, float alpha); void propagateThroughModulatedPyramid(source *src, float modulation, int modulation_sampling, float alpha); void readout(float exposureTime, float readOutNoiseRms); void noiseless_readout(float exposureTime); void readout(float exposureTime, float readOutNoiseRms, float nBackgroundPhoton, float noiseFactor); float strehl_ratio(imaging *ref); void info(void); void frame2file(const char *filename); void show_frame(char *filename); void show_frame(char *filename, imaging *ref); }; #endif // __IMAGING_H__ @ \subsection{Source} <>= #include "imaging.h" <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> @ \section{Parameters} \label{sec:parameters} \index{imaging!imaging} The parameters are: \begin{itemize} \item the linear size of the pupil in pixel [[N_PX_PUPIL]], \item the linear size of the discrete Fourier transform $[[N_DFT]]=2*[[N_PX_PUPIL]]$, \item the linear size of the lenslet array [[N_SIDE_LENSLET]], \item the linear size of the imagelet [[N_PX_IMAGE]], \item the linear size of the frame [[N_PX_CAMERA]], \item the bin factor of the imagelet [[BIN_IMAGE]] \item the number of sources [[N_SOURCE]], \item the pointing direction vector [[zenith]] and [[azimuth]], if [[zenith]] is negative, the device is align with the guide star, if it is positive or null then the guide star is displaced in the image plane according to their relative position, \item the pixel scale [[pixel_scale]], \item a flag setting the pointing of the imaging device either absolute or relative to the direction of the light source [[absolute_pointing]], \item the optics and detector photon to electron gain [[photoelectron_gain]], \item the random seed generator [[LOCAL_RAND_SEED]], \item the number of photon per second per frame [[N_PHOTON_PER_SECOND_PER_FRAME]]. \end{itemize} <>= int N_PX_PUPIL, N_DFT, N_SIDE_LENSLET, N_LENSLET, N_SOURCE, N_PX_IMAGE, N_PX_CAMERA, N_FRAME, BIN_IMAGE, LOCAL_RAND_SEED; cufftHandle plan; float N_PHOTON_PER_SECOND_PER_FRAME, N_PHOTON_PER_FRAME; float2 *d__wave_PUPIL; float *d__frame, zenith, azimuth, theta_x, theta_y, *d__zenith, *d__azimuth, *d__theta_x, *d__theta_y, pixel_scale, photoelectron_gain; char absolute_pointing; curandState *devStates; @ \section{Functions} \label{sec:functions} In this section, we are dealing with the process of computing images from wavefronts in the pupil. \subsection{Setup \& Cleanup} \label{sec:setup--cleanup} \index{imaging!imaging!setup} A [[setup]] routine is defined to initialize any required variables: <>= void imaging::setup(int __N_PX_PUPIL, int __N_SIDE_LENSLET, int DFT_osf, float IMAGE_osf, float CAMERA_osf) { N_SOURCE = 1; <> <> } <>= void imaging::setup(int __N_PX_PUPIL, int __N_SIDE_LENSLET, int DFT_osf, float IMAGE_osf, float CAMERA_osf, int __N_SOURCE) { N_SOURCE = __N_SOURCE; <> <> } <>= BIN_IMAGE = 1; N_PX_PUPIL = __N_PX_PUPIL; N_DFT = DFT_osf*N_PX_PUPIL;//*round_up_to_nhp2(N_PX_PUPIL); N_PX_IMAGE = IMAGE_osf*N_PX_PUPIL; N_PX_CAMERA = CAMERA_osf*N_PX_IMAGE; N_SIDE_LENSLET = __N_SIDE_LENSLET; N_LENSLET = N_SIDE_LENSLET*N_SIDE_LENSLET; N_FRAME = 0; @ <>= void imaging::setup(int __N_PX_PUPIL, int __N_SIDE_LENSLET, int DFT_osf, int N_PX_IMAGE_, float CAMERA_osf, int __N_SOURCE) { N_SOURCE = __N_SOURCE; <> <> } <>= BIN_IMAGE = 1; N_PX_PUPIL = __N_PX_PUPIL; N_DFT = DFT_osf*N_PX_PUPIL;//round_up_to_nhp2(N_PX_PUPIL); N_PX_IMAGE = N_PX_IMAGE_; N_PX_CAMERA = CAMERA_osf*N_PX_IMAGE; N_SIDE_LENSLET = __N_SIDE_LENSLET; N_LENSLET = N_SIDE_LENSLET*N_SIDE_LENSLET; N_FRAME = 0; @ <>= void imaging::setup(int __N_PX_PUPIL, int __N_SIDE_LENSLET, int DFT_osf, int N_PX_IMAGE_, int _BIN_IMAGE_, int __N_SOURCE) { N_SOURCE = __N_SOURCE; BIN_IMAGE = _BIN_IMAGE_; N_PX_PUPIL = __N_PX_PUPIL+1; N_DFT = DFT_osf*N_PX_PUPIL;//round_up_to_nhp2(N_PX_PUPIL); N_PX_IMAGE = N_PX_IMAGE_; N_PX_CAMERA = N_PX_IMAGE/BIN_IMAGE; N_SIDE_LENSLET = __N_SIDE_LENSLET; N_LENSLET = N_SIDE_LENSLET*N_SIDE_LENSLET; N_FRAME = 0; <> } @ <>= LOCAL_RAND_SEED = RAND_SEED; <> pixel_scale = 0.0; absolute_pointing = 0; photoelectron_gain = 1.0; INFO("@(CEO)>imaging: Device memory allocation\n"); int N_DFT_bytes = N_DFT * N_DFT * N_LENSLET*N_SOURCE; HANDLE_ERROR( cudaMalloc( (void**)&d__wave_PUPIL , sizeof(float2) * N_DFT_bytes) ); HANDLE_ERROR( cudaMalloc( (void**)&d__frame , sizeof(float) *N_PX_CAMERA*N_PX_CAMERA*N_LENSLET*N_SOURCE ) ); reset(); <> INFO("@(CEO)> Setting up random generator\n"); int n_data; n_data = N_SOURCE*N_LENSLET*N_PX_CAMERA*N_PX_CAMERA; HANDLE_ERROR( cudaMalloc( (void**)&devStates, n_data*sizeof(curandState)) ); dim3 blockDim(256); dim3 gridDim(n_data/256+1); setupRandomSequence_imaging LLL gridDim,blockDim RRR (devStates, n_data, LOCAL_RAND_SEED); info(); @ \index{imaging!imaging!setupSegmentPistonSensor} with <>= int n_byte = sizeof(float) * N_SOURCE; HANDLE_ERROR( cudaMalloc( (void**)&d__zenith , n_byte) ); HANDLE_ERROR( cudaMalloc( (void**)&d__azimuth , n_byte) ); HANDLE_ERROR( cudaMalloc( (void**)&d__theta_x , n_byte) ); HANDLE_ERROR( cudaMalloc( (void**)&d__theta_y , n_byte) ); HANDLE_ERROR( cudaMemset(d__zenith, 0, n_byte) ); HANDLE_ERROR( cudaMemset(d__azimuth, 0, n_byte) ); HANDLE_ERROR( cudaMemset(d__theta_x, 0, n_byte) ); HANDLE_ERROR( cudaMemset(d__theta_y, 0, n_byte) ); @ <>= void imaging::setupSegmentPistonSensor(int __N_PX_PUPIL, int __N_SIDE_LENSLET, int _N_DFT_, int N_PX_IMAGE_, int _BIN_IMAGE_, int __N_SOURCE) { N_SOURCE = __N_SOURCE; BIN_IMAGE = _BIN_IMAGE_; N_PX_PUPIL = __N_PX_PUPIL+1; N_DFT = _N_DFT_; N_PX_IMAGE = N_PX_IMAGE_; N_PX_CAMERA = N_PX_IMAGE/BIN_IMAGE; N_SIDE_LENSLET = __N_SIDE_LENSLET; N_LENSLET = N_SIDE_LENSLET*N_SIDE_LENSLET; N_FRAME = 0; <> pixel_scale = 0.0; absolute_pointing = 0; photoelectron_gain = 1.0; int N_DFT_bytes = N_DFT * N_DFT * N_LENSLET*N_SOURCE; cudaMalloc( (void**)&d__wave_PUPIL , sizeof(float2) * N_DFT_bytes); int n_DFT[NRANK] = {N_DFT, N_DFT}; int iodist = N_DFT*N_DFT; int BATCH = N_LENSLET*N_SOURCE; /* Create a 2D FFT plan. */ HANDLE_ERROR_CUFFT( cufftPlanMany(&plan, NRANK, n_DFT, NULL, 1, iodist, NULL, 1, iodist, CUFFT_C2C,BATCH), "Unable to create plan"); /* HANDLE_ERROR_CUFFT( cufftSetCompatibilityMode(plan, CUFFT_COMPATIBILITY_NATIVE), */ /* "Unable to set compatibility mode to native"); */ } @ \index{imaging!imaging!set\_pointing\_direction} The pointing direction is set with: <>= void imaging::set_pointing_direction(float *zen, float *azim) { int n_byte = sizeof(float) * N_SOURCE; HANDLE_ERROR( cudaMemcpy( d__zenith, zen, n_byte, cudaMemcpyHostToDevice ) ); HANDLE_ERROR( cudaMemcpy( d__azimuth, azim, n_byte, cudaMemcpyHostToDevice ) ); pointing_kern LLL 1, N_SOURCE RRR (d__theta_x, d__theta_y, d__zenith, d__azimuth); /* zenith = zen; azimuth = azim; theta_x = tanf(zenith)*cosf(azimuth); theta_y = tanf(zenith)*sinf(azimuth); */ } @ with the kernel <>= __global__ void pointing_kern(float *theta_x, float *theta_y, float *zenith, float *azimuth) { int k; k = threadIdx.x; theta_x[k] = tanf(zenith[k])*cosf(azimuth[k]); theta_y[k] = tanf(zenith[k])*sinf(azimuth[k]); } @ Dynamically allocated variables are freed with the cleanup routine: \index{imaging!imaging!cleanup} <>= void imaging::cleanup(void) { INFO("@(CEO)>imaging: Device memory free\n"); cufftDestroy(plan); cudaFree( d__wave_PUPIL ); cudaFree( d__frame ); cudaFree( devStates ); cudaFree(d__zenith); cudaFree(d__azimuth); cudaFree(d__theta_x); cudaFree(d__theta_y); } void imaging::cleanupSegmentPistonSensor(void) { cufftDestroy(plan); cudaFree( d__wave_PUPIL ); } @ \subsection{Propagation} \label{sec:propagation} <>= void imaging::propagate_cpx(source *src) { int M = N_PX_IMAGE/N_PX_CAMERA; dim3 gridDim; dim3 blockDim(16,16); gridDim = dim3(1+N_DFT*N_SIDE_LENSLET/16,1+N_DFT*N_SIDE_LENSLET/16,N_SOURCE); complexAmplitude LLL gridDim , blockDim RRR(d__wave_PUPIL, N_DFT, src->wavefront.amplitude, src->wavefront.phase, src->wavenumber(), N_PX_PUPIL, N_PX_IMAGE, M, N_SIDE_LENSLET, src->dev_ptr, pixel_scale, d__theta_x, d__theta_y, absolute_pointing,1); HANDLE_ERROR_CUFFT( cufftExecC2C(plan, d__wave_PUPIL, d__wave_PUPIL, CUFFT_FORWARD), "Unable to execute plan forward FT"); } @ As we are considering only the case of imaging sources at infinity, the wavefront propagation is performed with the Fraunhofer approximation meaning that the image in the focal plane is simply the Fourier transform of the complex amplitude in the pupil plane. The propagation is computing with a call to either the [[propagate]] routine: \index{imaging!imaging!propagate} <>= void imaging::propagate(source *src) { int M = N_PX_IMAGE/N_PX_CAMERA; /* if ( (pixel_scale>0) && (zenith<0) ) */ /* set_pointing_direction(0.0,0.0); */ /* if (pixel_scale>0 ) { pixel_scale /= BIN_IMAGE; printf(" pixel scale: %.2E\n",pixel_scale); float dx, dy, dx_int, dy_int; int iSource=0; <> printf("ANGULAR OFFSET:\n"); printf(" . x: %3.0f + %4.3f\n",dx_int,dx); printf(" . y: %3.0f + %4.3f\n",dy_int,dy); } */ //absolute_pointing = 1; // fprintf(stdout," pixel scale: %.2E\n",pixel_scale); // fprintf(stdout,"Absolute pointing: %d\n",absolute_pointing); dim3 gridDim; dim3 blockDim(16,16); gridDim = dim3(1+N_DFT*N_SIDE_LENSLET/16,1+N_DFT*N_SIDE_LENSLET/16,N_SOURCE); complexAmplitude LLL gridDim , blockDim RRR(d__wave_PUPIL, N_DFT, src->wavefront.amplitude, src->wavefront.phase, src->wavenumber(), N_PX_PUPIL, N_PX_IMAGE, M, N_SIDE_LENSLET, src->dev_ptr, pixel_scale, d__theta_x, d__theta_y, absolute_pointing,1); <> } @ or, in the case of no overlapping lenslet, the [[propagateNoOverlap]] routine: \index{imaging!imaging!propagateNoOverlap} <>= void imaging::propagateNoOverlap(source *src) { int M = N_PX_IMAGE/N_PX_CAMERA; dim3 gridDim; dim3 blockDim(16,16); gridDim = dim3(1+N_DFT*N_SIDE_LENSLET/16,1+N_DFT*N_SIDE_LENSLET/16,N_SOURCE); complexAmplitudeNoOverlap LLL gridDim , blockDim RRR(d__wave_PUPIL, N_DFT, src->wavefront.amplitude, src->wavefront.phase, src->wavenumber(), N_PX_PUPIL, N_PX_IMAGE, M, N_SIDE_LENSLET, src->dev_ptr, pixel_scale, d__theta_x, d__theta_y, absolute_pointing,1); <> <> int N, Nb; N = N_DFT * N_DFT * N_LENSLET*N_SOURCE; Nb = (int) ceilf( sqrtf(N)/16.0 ); gridDim = dim3(Nb,Nb); wave2intensity LLL gridDim , blockDim RRR( d__wave_PUPIL, N, intensity_norm); gridDim = dim3(1+N_PX_CAMERA*N_SIDE_LENSLET/16,1+N_PX_CAMERA*N_SIDE_LENSLET/16, N_SOURCE); binning LLL gridDim , blockDim RRR(d__frame, d__wave_PUPIL, N_DFT, src->dev_ptr, pixel_scale, d__theta_x, d__theta_y, absolute_pointing, N_PX_IMAGE, N_PX_CAMERA, N_SIDE_LENSLET); ++N_FRAME; } @ or, in the case of no overlapping lenslet and no intensity normalization, the [[propagateNoOverlap]] routine: \index{imaging!imaging!propagateNoOverlapBare} <>= void imaging::propagateNoOverlapBare(source *src) { int M = N_PX_IMAGE/N_PX_CAMERA; dim3 gridDim; dim3 blockDim(16,16); gridDim = dim3(1+N_DFT*N_SIDE_LENSLET/16,1+N_DFT*N_SIDE_LENSLET/16,N_SOURCE); complexAmplitudeNoOverlap LLL gridDim , blockDim RRR(d__wave_PUPIL, N_DFT, src->wavefront.amplitude, src->wavefront.phase, src->wavenumber(), N_PX_PUPIL, N_PX_IMAGE, M, N_SIDE_LENSLET, src->dev_ptr, pixel_scale, d__theta_x, d__theta_y, absolute_pointing,1); <> int N, Nb; N = N_DFT * N_DFT * N_LENSLET*N_SOURCE; Nb = (int) ceilf( sqrtf(N)/16.0 ); gridDim = dim3(Nb,Nb); wave2intensity LLL gridDim , blockDim RRR( d__wave_PUPIL, N, 1.0); gridDim = dim3(1+N_PX_CAMERA*N_SIDE_LENSLET/16,1+N_PX_CAMERA*N_SIDE_LENSLET/16, N_SOURCE); binning LLL gridDim , blockDim RRR(d__frame, d__wave_PUPIL, N_DFT, src->dev_ptr, pixel_scale, d__theta_x, d__theta_y, absolute_pointing, N_PX_IMAGE, N_PX_CAMERA, N_SIDE_LENSLET); ++N_FRAME; } @ a special case is the dispersed segment piston sensor: \index{imaging!imaging!propagateNoOverlapSPS} <>= void imaging::propagateNoOverlapSPS(source *src, float d, float wavenumber) { int M = N_PX_IMAGE/N_PX_CAMERA; dim3 gridDim; dim3 blockDim(16,16); gridDim = dim3(1+N_DFT*N_SIDE_LENSLET/16,1+N_DFT*N_SIDE_LENSLET/16,N_SOURCE); complexAmplitudeNoOverlapSPS LLL gridDim , blockDim RRR(d__wave_PUPIL, N_DFT, src->wavefront.amplitude, src->wavefront.phase, wavenumber, N_PX_PUPIL, N_PX_IMAGE, M, N_SIDE_LENSLET, src->dev_ptr, d,1); <> <> int N, Nb; N = N_DFT * N_DFT * N_LENSLET*N_SOURCE; Nb = (int) ceilf( sqrtf(N)/16.0 ); gridDim = dim3(Nb,Nb); wave2intensity LLL gridDim , blockDim RRR( d__wave_PUPIL, N, intensity_norm); gridDim = dim3(1+N_PX_CAMERA*N_SIDE_LENSLET/16,1+N_PX_CAMERA*N_SIDE_LENSLET/16, N_SOURCE); // fprintf(stdout,"FWHM=%f\n",src->fwhm); if (src->fwhm>0) { INFO("CONVOLUTION!\n"); gridDim = dim3(1+N_DFT*N_SIDE_LENSLET/16,1+N_DFT*N_SIDE_LENSLET/16,N_SOURCE); <> } binning LLL gridDim , blockDim RRR(d__frame, d__wave_PUPIL, N_DFT, src->dev_ptr, pixel_scale, d__theta_x, d__theta_y, absolute_pointing, N_PX_IMAGE, N_PX_CAMERA, N_SIDE_LENSLET); ++N_FRAME; } @ another one is the TT7 \index{imaging!imaging!propagateTT7} <>= void imaging::propagateTT7(source *src) { <> complexAmplitudeTT7 LLL gridDim , blockDim RRR(d__wave_PUPIL, N_DFT, src->wavefront.amplitude, src->wavefront.phase, src->wavenumber(), src->rays.d__piston_mask, N_PX_PUPIL, N_PX_IMAGE, M, N_SIDE_LENSLET, src->dev_ptr, pixel_scale, d__theta_x, d__theta_y, absolute_pointing,1); <> } void imaging::propagateTT7(source *src, int *d__piston_mask) { <> complexAmplitudeTT7 LLL gridDim , blockDim RRR(d__wave_PUPIL, N_DFT, src->wavefront.amplitude, src->wavefront.phase, src->wavenumber(), d__piston_mask, N_PX_PUPIL, N_PX_IMAGE, M, N_SIDE_LENSLET, src->dev_ptr, pixel_scale, d__theta_x, d__theta_y, absolute_pointing,1); <> } @ with <>= int M = N_PX_IMAGE/N_PX_CAMERA; /* if ( (pixel_scale>0) && (zenith<0) ) */ /* set_pointing_direction(0.0,0.0); */ /* if (pixel_scale>0 ) { printf(" pixel scale: %.2E\n",pixel_scale); float dx, dy, dx_int, dy_int; <> printf("ANGULAR OFFSET:\n"); printf(" . x: %3.0f + %4.3f\n",dx_int,dx); printf(" . y: %3.0f + %4.3f\n",dy_int,dy); } */ //absolute_pointing = 1; //fprintf(stdout,"Absolute pointing: %d\n",absolute_pointing); dim3 gridDim; dim3 blockDim(16,16); gridDim = dim3(1+N_DFT*N_SIDE_LENSLET/16,1+N_DFT*N_SIDE_LENSLET/16,N_SOURCE); @ and with <>= HANDLE_ERROR_CUFFT( cufftExecC2C(plan, d__wave_PUPIL, d__wave_PUPIL, CUFFT_FORWARD), "Unable to execute plan forward FT"); N_PHOTON_PER_FRAME = N_PHOTON_PER_SECOND_PER_FRAME = src->n_photon()*src->wavefront.M->area; float intensity_norm; intensity_norm = src->wavefront.M->area/src->wavefront.M->nnz; fprintf(stdout,"Source # of photon: %g\n",src->n_photon()); fprintf(stdout,"Wavefront mask area and nnz: (%f,%f) & %f\n",src->rays.V.area,src->wavefront.M->area,src->wavefront.M->nnz); fprintf(stdout,"# of photon per second: %g\n",N_PHOTON_PER_SECOND_PER_FRAME); int N, Nb; N = N_DFT * N_DFT * N_LENSLET*N_SOURCE; Nb = (int) ceilf( sqrtf(N)/16.0 ); gridDim = dim3(Nb,Nb); wave2intensity LLL gridDim , blockDim RRR( d__wave_PUPIL, N, intensity_norm); if (src->fwhm>0) { gridDim = dim3(1+N_DFT*N_SIDE_LENSLET/16,1+N_DFT*N_SIDE_LENSLET/16,N_SOURCE); HANDLE_ERROR_CUFFT( cufftExecC2C(plan, d__wave_PUPIL, d__wave_PUPIL, CUFFT_INVERSE), "Unable to execute plan forward FT"); apply_irradiance LLL gridDim , blockDim RRR (d__wave_PUPIL, N_DFT, N_SIDE_LENSLET, src->fwhm); HANDLE_ERROR_CUFFT( cufftExecC2C(plan, d__wave_PUPIL, d__wave_PUPIL, CUFFT_FORWARD), "Unable to execute plan forward FT"); } gridDim = dim3(1+N_PX_CAMERA*N_SIDE_LENSLET/16,1+N_PX_CAMERA*N_SIDE_LENSLET/16, N_SOURCE); binningTT7 LLL gridDim , blockDim RRR(d__frame, d__wave_PUPIL, N_DFT, src->dev_ptr, pixel_scale, d__theta_x, d__theta_y, absolute_pointing, N_PX_IMAGE, N_PX_CAMERA, N_SIDE_LENSLET); ++N_FRAME; @ which sets the complex amplitude [[d__wave_PUPIL]] with the source wavefront amplitude and phase including the necessary zero padding for the DFT and the phasor to recenter the image. Then the DFT is applied in--place to the complex amplitude [[d__wave_PUPIL]]. <>= <> @ [[d__wave_PUPIL]] is overwritten with the intensity: <>= <> int N, Nb; N = N_DFT * N_DFT * N_LENSLET*N_SOURCE; Nb = (int) ceilf( sqrtf(N)/16.0 ); gridDim = dim3(Nb,Nb); wave2intensity LLL gridDim , blockDim RRR( d__wave_PUPIL, N, intensity_norm); if (src->fwhm>0) { gridDim = dim3(1+N_DFT*N_SIDE_LENSLET/16,1+N_DFT*N_SIDE_LENSLET/16,N_SOURCE); <> } @ \subsubsection{Convolution} \label{sec:convolution} In the case of a resolved source, the images are convoluted by the source irradiance function. The convolution is performed in the Fourier domain, multiplying the images by the Fourier transform of the irradiance function. The irradiance functions $I_s(x)$ is modeled as a Gaussian function: $$I_s(x,y)={1\over \sigma\sqrt{2\pi}}\exp\left( -{x^2+y^2\over 2\sigma^2} \right),$$ with $$[[fwhm]]=2\sqrt{2\log(2)}\sigma,$$ [[fwhm]] is the full width at half maximum of $I(x,y)$ in detector pixel unit (before binning). The Fourier transform of $I(x,y)$ is $$\mathcal F\left[ I(x,y) \right](u,v) = \exp\left( -2\pi^2\sigma^2 (u^2 + v^2) \right).$$ The convolution takes 3 steps: \begin{enumerate} \item inverse DFT of the images <>= HANDLE_ERROR_CUFFT( cufftExecC2C(plan, d__wave_PUPIL, d__wave_PUPIL, CUFFT_INVERSE), "Unable to execute plan forward FT"); @ \item multiplication by $\mathcal F\left[ I(x,y) \right](u,v)$ <>= apply_irradiance LLL gridDim , blockDim RRR (d__wave_PUPIL, N_DFT, N_SIDE_LENSLET, src->fwhm); @ \item forward DFTT <>= HANDLE_ERROR_CUFFT( cufftExecC2C(plan, d__wave_PUPIL, d__wave_PUPIL, CUFFT_FORWARD), "Unable to execute plan forward FT"); @ \end{enumerate} with the kernel: <>= __global__ void apply_irradiance(float2 *wave_PUPIL, const int N_DFT, const int N_SIDE_LENSLET, const float fwhm) { int i, j, iLenslet, jLenslet, ij_DFT, iSource, w, half, N2; float xi, yj, sigma2, fft_I; iSource = blockIdx.z; if ( threads2lenslet(threadIdx, blockIdx, blockDim, &i, &j, N_DFT, &iLenslet, &jLenslet, N_SIDE_LENSLET) ) { ij_DFT = lenslet2row(i,j,N_DFT,iLenslet,jLenslet,N_SIDE_LENSLET,iSource); w = N_DFT & 1; half = (N_DFT-2+w)/2; if (i>half) xi = (float) (i - N_DFT); else xi = (float) i; if (j>half) yj = (float) (j - N_DFT); else yj = (float) j; xi /= N_DFT; yj /= N_DFT; sigma2 = 8*logf(2); sigma2 = fwhm*fwhm/sigma2; fft_I = 2*PI*PI*sigma2; fft_I *= xi*xi + yj*yj; fft_I = expf( -fft_I); N2 = N_DFT*N_DFT; wave_PUPIL[ij_DFT].x *= fft_I/N2; wave_PUPIL[ij_DFT].y *= fft_I/N2; } } @ The image are binned on the detector pixels: <>= gridDim = dim3(1+N_PX_CAMERA*N_SIDE_LENSLET/16,1+N_PX_CAMERA*N_SIDE_LENSLET/16, N_SOURCE); binning LLL gridDim , blockDim RRR(d__frame, d__wave_PUPIL, N_DFT, src->dev_ptr, pixel_scale, d__theta_x, d__theta_y, absolute_pointing, N_PX_IMAGE, N_PX_CAMERA, N_SIDE_LENSLET); ++N_FRAME; @ The Fourier transform is done with the CUFFT library: <>= HANDLE_ERROR_CUFFT( cufftExecC2C(plan, d__wave_PUPIL, d__wave_PUPIL, CUFFT_FORWARD), "Unable to execute plan forward FT"); @ that computes the discrete Fourier tranform (DFT) from the complex amplitude [[d__wave_PUPIL]] in the pupil plane. CUFFT requires to define a plan first: <>= INFO("@(CEO)>imaging: Creating a 2D FFT plan\n"); int n_DFT[NRANK] = {N_DFT, N_DFT}; int iodist = N_DFT*N_DFT; int BATCH = N_LENSLET*N_SOURCE; /* Create a 2D FFT plan. */ HANDLE_ERROR_CUFFT( cufftPlanMany(&plan, NRANK, n_DFT, NULL, 1, iodist, NULL, 1, iodist, CUFFT_C2C,BATCH), "Unable to create plan"); /* HANDLE_ERROR_CUFFT( cufftSetCompatibilityMode(plan, CUFFT_COMPATIBILITY_NATIVE), */ /* "Unable to set compatibility mode to native"); */ @ The intensity is computed with <>= __global__ void wave2intensity(float2 *wave_PUPIL, const int N, float intensity_norm) { int i, j, ij; float x ,y; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; ij = j * gridDim.x * blockDim.x + i; if ( ij < N ) { x = wave_PUPIL[ij].x; y = wave_PUPIL[ij].y; wave_PUPIL[ij].x = x*x + y*y; wave_PUPIL[ij].y = 0.0; wave_PUPIL[ij].x *= intensity_norm; } } @ %def wave2intensity @ The intensity is normalized with respect to the brightness of the source: <>= N_PHOTON_PER_FRAME = N_PHOTON_PER_SECOND_PER_FRAME = src->n_photon()*src->wavefront.M->area; float intensity_norm; intensity_norm = src->wavefront.M->area/src->wavefront.M->nnz; /* fprintf(stdout,"Source # of photon: %g\n",src->n_photon()); fprintf(stdout,"Wavefront mask area and nnz: %f & %f\n",src->wavefront.M->area,src->wavefront.M->nnz); fprintf(stdout,"# of photon per second: %g\n",N_PHOTON_PER_SECOND_PER_FRAME); */ @ The camera frame is continuously accumulating images and can be reset with: \index{imaging!imaging!reset} <>= void imaging::reset(void) { HANDLE_ERROR( cudaMemset(d__frame, 0, sizeof(float)*N_PX_CAMERA*N_PX_CAMERA*N_LENSLET*N_SOURCE) ); N_FRAME = 0; } @ The rank of the DFT [[NRANK]] is always 2 and the size of each DFT is $[[N_DFT]]\times[[N_DFT]]$. CUFFT can execute several Fourier transform in one batch. All the inputs wave must be concatenated into a single array and you must specify the number of elements between 2 consecutive waves that is here $[[N_DFT]]^2$. [[BATCH]] gives the number of DFT to execute. The memory allocate for the plan is freed with <>= cufftDestroy(plan); @ \subsubsection{Propagation through field stop} \label{sec:prop-thro-field} In order to reduce aliasing, a field stop is sometimes introduced in a focal plane. The following describes the routine [[propagateThroughFieldStop]] that takes a wavefront from a pupil plane and propagates it to another pupil plane through a focal plane where a field stop is set. The angular resolution in the focal plane is given by $\lambda/kD$ with $k=[[N_DFT]]/[[N_PX_PUPIL]]$. The angular diameter of the field stop is given as $\beta\lambda/D$ and in pixel it is $k\beta$. The pixel coordinate $[i,j]$ in the focal plane are given by $$i,j=\left[ {w-[[N_DFT]]\over 2},\dots,{[[N_DFT]]-2+w\over 2} \right],$$ with $w=0$ for [[N_DFT]] even or $w=1$ for [[N_DFT]] odd. \index{imaging!imaging!propagateThroughFieldStop} <>= void imaging::propagateThroughFieldStop(source *src, float field_stop_diam) { int M = N_PX_IMAGE/N_PX_CAMERA; dim3 gridDim; dim3 blockDim(16,16); gridDim = dim3(1+N_DFT*N_SIDE_LENSLET/16,1+N_DFT*N_SIDE_LENSLET/16,N_SOURCE); complexAmplitude LLL gridDim , blockDim RRR(d__wave_PUPIL, N_DFT, src->wavefront.amplitude, src->wavefront.phase, src->wavenumber(), N_PX_PUPIL, N_PX_IMAGE, M, N_SIDE_LENSLET, src->dev_ptr, pixel_scale, d__theta_x, d__theta_y, absolute_pointing,0); <> field_stop_diam *= N_DFT/N_PX_PUPIL; gridDim = dim3(1+N_DFT/16,1+N_DFT/16); apply_field_stop LLL gridDim , blockDim RRR( d__wave_PUPIL, N_DFT, N_PX_PUPIL, field_stop_diam); <> } @ <>= __global__ void apply_field_stop(float2 *d__wave_PUPIL, int N_DFT, int N_PX_PUPIL, float field_stop_diam) { int i, j, k, w, half; float xi, yj, rij, l, c, s, phasor, a, b; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; if ( (ihalf) xi = (float) (i - N_DFT); else xi = (float) i; if (j>half) yj = (float) (j - N_DFT); else yj = (float) j; rij = hypot(xi,yj); l = 0.5*field_stop_diam; k = i*N_DFT + j; if (rij>l) { d__wave_PUPIL[k].x = d__wave_PUPIL[k].y = 0.0; } else { phasor = 2.*(i+j)*(N_PX_PUPIL-1)/N_DFT; sincospif(phasor,&s,&c); a = d__wave_PUPIL[k].x; b = d__wave_PUPIL[k].y; d__wave_PUPIL[k].x = a*c - b*s; d__wave_PUPIL[k].y = b*c + a*s; } } } @ \subsubsection{Propagation through pyramid} \label{sec:prop-thro-pym} The pyramid wavefront sensor is a focal plane wavefront sensor. An image is formed on the tip of a glass pyramid and the camera is set behind the pyramid in the next pupil plane. The pyramid transmittance function is \begin{equation} \label{eq:4} T_{pyr}(x,y) = \exp\left[ -{\pi\over2} \left( \left|x\right| + \left|y\right| \right) \right] \end{equation} for $x,y=i,j-[[N_DFT]]/2$ with $i,i=0,\dots,[[N_DFT]]-1$. For a complex amplitude $\Phi$, the camera image $I$ is given by \begin{equation} \label{eq:5} I = \left| \mathcal F \left[ \mathcal F[\Phi] T_{pyr} \right] \right|^2. \end{equation} \index{imaging!imaging!propagateThroughPyramid} <>= void imaging::propagateThroughPyramid(source *src, float alpha) { int M = N_PX_IMAGE/N_PX_CAMERA; dim3 gridDim; dim3 blockDim(16,16); gridDim = dim3(1+N_DFT*N_SIDE_LENSLET/16,1+N_DFT*N_SIDE_LENSLET/16,N_SOURCE); complexAmplitude LLL gridDim , blockDim RRR(d__wave_PUPIL, N_DFT, src->wavefront.amplitude, src->wavefront.phase, src->wavenumber(), N_PX_PUPIL, N_PX_IMAGE, M, N_SIDE_LENSLET, src->dev_ptr, pixel_scale, d__theta_x, d__theta_y, absolute_pointing,0); <> gridDim = dim3(1+N_DFT/16,1+N_DFT/16, N_SOURCE); apply_pyramid LLL gridDim , blockDim RRR( d__wave_PUPIL, N_DFT, N_PX_PUPIL, alpha); <> } @ with <>= __global__ void apply_pyramid(float2 *d__wave_PUPIL, int N_DFT, int N_PX_PUPIL, float alpha) { int i, j, k, w, half, iSource; float xi, yj, c, s, phasor, a, b; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; iSource = blockIdx.z; if ( (ihalf) i -= N_DFT; if (j>half) j -= N_DFT; xi = (float) i; yj = (float) j; phasor = -2*(i+j)*(N_PX_PUPIL*1.5 + 1 - w*0.5)/N_DFT; phasor -= alpha*(fabs(xi)+fabs(yj)); sincospif(phasor,&s,&c); a = d__wave_PUPIL[k].x; b = d__wave_PUPIL[k].y; d__wave_PUPIL[k].x = (a*c - b*s)/N_DFT; d__wave_PUPIL[k].y = (b*c + a*s)/N_DFT; } } @ In the case of modulation: \index{imaging!imaging!propagateThroughModulatedPyramid} <>= void imaging::propagateThroughModulatedPyramid(source *src, float modulation, int modulation_sampling, float alpha) { int M, nTheta, kTheta, N, Nb; float theta, intensity_norm; nTheta = modulation_sampling; //printf("nTheta=%d\n",nTheta); M = N_PX_IMAGE/N_PX_CAMERA; dim3 gridDim; dim3 blockDim(16,16); for (kTheta = 0;kThetawavefront.amplitude, src->wavefront.phase, src->wavenumber(), N_PX_PUPIL, N_PX_IMAGE, M, N_SIDE_LENSLET, src->dev_ptr, pixel_scale, d__theta_x, d__theta_y, absolute_pointing,0, modulation,theta); <> gridDim = dim3(1+N_DFT/16,1+N_DFT/16,N_SOURCE); apply_pyramid LLL gridDim , blockDim RRR( d__wave_PUPIL, N_DFT, N_PX_PUPIL, alpha); <> N_PHOTON_PER_FRAME = N_PHOTON_PER_SECOND_PER_FRAME = src->n_photon()*src->wavefront.M->area; intensity_norm = src->wavefront.M->area/src->wavefront.M->nnz/nTheta; N = N_DFT * N_DFT * N_LENSLET*N_SOURCE; Nb = (int) ceilf( sqrtf(N)/16.0 ); gridDim = dim3(Nb,Nb); wave2intensity LLL gridDim , blockDim RRR( d__wave_PUPIL, N, intensity_norm); if (src->fwhm>0) { gridDim = dim3(1+N_DFT*N_SIDE_LENSLET/16,1+N_DFT*N_SIDE_LENSLET/16,N_SOURCE); <> } gridDim = dim3(1+N_PX_CAMERA*N_SIDE_LENSLET/16,1+N_PX_CAMERA*N_SIDE_LENSLET/16, N_SOURCE); binning LLL gridDim , blockDim RRR(d__frame, d__wave_PUPIL, N_DFT, src->dev_ptr, pixel_scale, d__theta_x, d__theta_y, absolute_pointing, N_PX_IMAGE, N_PX_CAMERA, N_SIDE_LENSLET); } ++N_FRAME; } @ \subsubsection{Imaging numerical model} \label{sec:imag-numer-model} The imaging routine is modeling the propagation of [[N_SOURCE]] sources through a square lenslet array of length $[[N_SIDE_LENSLET]]\geq 1$ to the lenslet focal plane. For a given lenslet, the wave $\Psi=[[wave_PUPIL]]$ is sampled with $N_{pupil}\times N_{pupil}$ pixels. The complex amplitude in the image plane $\Psi^\prime=[[wave_IMAGE]]$ is given by \begin{equation} \label{eq:1} \Psi^\prime_{kl} = \sum_{i=0}^{N_{pupil}-1}\;\sum_{j=0}^{N_{pupil}-1} \Psi_{ij} \exp\left[-2\iota\pi{ik+jl \over N_{pupil} } \right] \end{equation} with $k\in[0,\dots,N_{pupil}-1]$ and $l\in[0,\dots,N_{pupil}-1]$. To avoid numerical aliasing, $\Psi$ is usually padded with zeros, this is usually called a zero--padded DFT (ZP--DFT). The new size of $\Psi$ is $N_{DFT}\times N_{DFT}$ with $N_{DFT} = 2 N_{pupil}$. $\Psi^\prime$ is rewritten \begin{equation} \label{eq:3} \Psi^\prime_{kl} = \sum_{i=0}^{N_{DFT}-1}\;\sum_{j=0}^{N_{DFT}-1} \Psi_{ij} \exp\left[-2\iota\pi{ik+jl \over N_{DFT} } \right] \end{equation} with $k\in[0,\dots,N_{DFT}-1]$ and $l\in[0,\dots,N_{DFT}-1]$. the zero frequency point in the image is at the pixel [0,0] whereas it should be located on the system optical axis. To shift the image, the following substitution is performed $$k \leftarrow k - { 1 - N_{DFT} + w \over 2 },$$ $$l \leftarrow l - { 1 - N_{DFT} + w \over 2 }$$ and $$\Psi_{ij} \leftarrow \Psi_{ij}\exp\left[\iota\pi{(i+j)(1 - N_{DFT} + w) \over N_{DFT} }\right].$$ % When the pupil is mapped with a lenslet array, the complex amplitude of each lenslet must be stacked in a row. % A pixel $(i_p,j_p)$ in the lenslet $(i_L,j_L)$ of a square lenslet array of side length [[N_SIDE_LENSLET]] is at coordinates $(i,j)$ in the lenslet array and at coordinates $(i^\prime,j^\prime)$ in the row of lenslets with % \begin{eqnarray} % \label{eq:1} % i &=& i_LN_{pupil} + i_p \\ % j &=& j_LN_{pupil} + j_p \\ % \end{eqnarray} % and % \begin{eqnarray} % \label{eq:2} % i^\prime &=& k_LN_{pupil} + i_p \\ % j^\prime &=& j_p % \end{eqnarray} % with $k_L= i_L[[N_SIDE_LENSLET]] + j_L$. When the imaging device and the source are not aligned, the source is displaced in the image plane. The source location is given by its direction vector $\vec\theta_\ast$. The device is pointing in the direction given by the vector $\vec\theta_d$. The angular displacement in the image plane is: $$\vec\delta=\delta_M\vec\theta_\ast-\vec\theta_d.$$ The pointing of the imaging device can be either absolute $\delta_M=1$ i.e. with respect to the zenith direction or relative to the source direction i.e. $\delta_M=0$ the default. If $p$ is the angular pixel scale on the camera, then the displacement in pixel is $\vec d=\vec\delta/p$. %The pixel displacement is divided into the integer pixel displacement on the camera, $\text{round}(\vec d)$, and the displacement in the imagelets, $M\left(\vec d - \text{round}(\vec d)\right)$ with $M=[[N_PX_IMAGE]]/[[N_PX_CAMERA]]$. The pixel displacement is divided into the integer pixel displacement on the camera, $\text{round}(\vec d)$, and the displacement in the imagelets, $\left(\vec d - \text{round}(\vec d)\right)$. The displacement in pixel is computed with: <>= if (pixel_scale>0) { dx = absolute_pointing*src[iSource].theta_x - theta_x[iSource]; dx /= pixel_scale; dx_int = rintf( dx ); dx -= dx_int; //dx *= M; dy = absolute_pointing*src[iSource].theta_y - theta_y[iSource]; dy /= pixel_scale; dy_int = rintf( dy ); dy -= dy_int; //dy *= M; } @ <>= __global__ void complexAmplitude(float2 *wave_PUPIL, int N_DFT, float *wavefront_amplitude, float *wavefront_phase, float wavenumber, int N_PX_PUPIL, int N_PX_IMAGE, int M, int N_SIDE_LENSLET, source *src, float pixel_scale, float *theta_x, float *theta_y, char absolute_pointing, char DFT_SHIFT) { int i, j, iLenslet, jLenslet, ij_PUPIL, ij_DFT, iSource; float c ,s, phasor, dx, dy, dx_int, dy_int; dx = dy = 0.0; <> <> <> } __global__ void complexAmplitudeTT7(float2 *wave_PUPIL, int N_DFT, float *wavefront_amplitude, float *wavefront_phase, float wavenumber, int *piston_mask, int N_PX_PUPIL, int N_PX_IMAGE, int M, int N_SIDE_LENSLET, source *src, float pixel_scale, float *theta_x, float *theta_y, char absolute_pointing, char DFT_SHIFT) { int i, j, iLenslet, jLenslet, ij_PUPIL, ij_DFT, iSource; float c ,s, phasor, dx, dy, dx_int, dy_int; dx = dy = 0.0; <> ij_PUPIL = lenslet2array_overlap(i,j,N_PX_PUPIL,iLenslet,jLenslet,N_SIDE_LENSLET,0); <> } @ %def complexAmplitude <>= __global__ void complexAmplitudeWithModulation(float2 *wave_PUPIL, int N_DFT, float *wavefront_amplitude, float *wavefront_phase, float wavenumber, int N_PX_PUPIL, int N_PX_IMAGE, int M, int N_SIDE_LENSLET, source *src, float pixel_scale, float *theta_x, float *theta_y, char absolute_pointing, char DFT_SHIFT, float modulation_r, float modulation_o) { int i, j, iLenslet, jLenslet, ij_PUPIL, ij_DFT, iSource; float c ,s, phasor, dx, dy, dx_int, dy_int; dx = dy = 0.0; <> <> <> } @ %def complexAmplitudeWithModulation The source index is given by the third coordinates of the thread block <>= // source index iSource = blockIdx.z; @ The pixel coordinates ([[i]],[[j]]) in the lenslet ([[iLenslet]],[[jLenslet]]) is derived from the thread blocks: <>= // wave pixel coordinate (N_DFTLxN_SIDE_LENSLET)^2 if ( threads2lenslet(threadIdx, blockIdx, blockDim, &i, &j, N_DFT, &iLenslet, &jLenslet, N_SIDE_LENSLET) ) { @ The linear index of the vertically concatenated lenslet array is computed <>= ij_DFT = lenslet2row(i,j,N_DFT,iLenslet,jLenslet,N_SIDE_LENSLET,iSource); if ((i>= ij_PUPIL = lenslet2array_overlap(i,j,N_PX_PUPIL,iLenslet,jLenslet,N_SIDE_LENSLET,iSource); @ The phase shift is applied on the wavefront <>= <> <> <> @ and for the TT7 sensor <>= <> <> <> @ <>= phasor = (DFT_SHIFT==0) ? 0.0 : PI*(i+j)*(1-N_DFT + (N_PX_IMAGE & 1))/N_DFT; <> phasor -= 2*PI*(i*dx+j*dy)/N_DFT; <>= phasor += wavefront_phase[ij_PUPIL]*wavenumber; sincosf(phasor,&c,&s); @ In the case of a modulated PSF with amplitude [[modulation]] <>= <> sincosf(modulation_o,&s,&c); phasor -= 8*PI*modulation_r*(i*c+j*s)/N_DFT; <> <> @ The wavefront in the pupil plane is written in the complex amplitude array ready to be processed by CUFFT <>= wave_PUPIL[ij_DFT].x = wave_PUPIL[ij_DFT].y = wavefront_amplitude[ij_PUPIL]; wave_PUPIL[ij_DFT].x *= c; wave_PUPIL[ij_DFT].y *= s; return; } if ((i>= wave_PUPIL[ij_DFT].x = wave_PUPIL[ij_DFT].y = wavefront_amplitude[ij_PUPIL]* (piston_mask[ij_PUPIL]==(iSource+1)); wave_PUPIL[ij_DFT].x *= c; wave_PUPIL[ij_DFT].y *= s; return; } if ((i>= __global__ void complexAmplitudeNoOverlap(float2 *wave_PUPIL, int N_DFT, float *wavefront_amplitude, float *wavefront_phase, float wavenumber, int N_PX_PUPIL, int N_PX_IMAGE, int M, int N_SIDE_LENSLET, source *src, float pixel_scale, float *theta_x, float *theta_y, char absolute_pointing, char DFT_SHIFT) { int i, j, iLenslet, jLenslet, ij_PUPIL, ij_DFT, iSource; float c ,s, phasor, dx, dy, dx_int, dy_int; dx = dy = 0.0; <> <> <> } @ %def complexAmplitudeNoOverlap where <>= ij_PUPIL = lenslet2array(i,j,N_PX_PUPIL,iLenslet,jLenslet,N_SIDE_LENSLET,iSource); @ In the case of the segment piston sensor, the following modified routine is used instead <>= __global__ void complexAmplitudeNoOverlapSPS(float2 *wave_PUPIL, int N_DFT, float *wavefront_amplitude, float *wavefront_phase, float wavenumber, int N_PX_PUPIL, int N_PX_IMAGE, int M, int N_SIDE_LENSLET, source *src, float d, char DFT_SHIFT) { int i, j, iLenslet, jLenslet, kLenslet, ij_PUPIL, ij_DFT, iSource; float c ,s, phasor, theta, ctheta, stheta; <> <> kLenslet = jLenslet*N_SIDE_LENSLET + iLenslet; kLenslet = kLenslet%12; if (kLenslet<6) theta = (3-kLenslet)*PI/3.0; else theta = (2-(kLenslet-6))*PI/3.0; sincosf(theta,&ctheta,&stheta); phasor = (DFT_SHIFT==0) ? 0.0 : PI*(i+j)*(1-N_DFT + (N_PX_IMAGE & 1))/N_DFT; phasor -= 2*PI*d*(i*ctheta+j*stheta)/N_DFT; phasor += wavefront_phase[ij_PUPIL]*wavenumber; sincosf(phasor,&c,&s); wave_PUPIL[ij_DFT].x = wave_PUPIL[ij_DFT].y = wavefront_amplitude[ij_PUPIL]; wave_PUPIL[ij_DFT].x *= c; wave_PUPIL[ij_DFT].y *= s; return; } if ((i,yshift=-2mm] (-8,-8) -- node[below] {[[N_PX_PUPIL]]} (8,-8); \end{scope} \begin{scope}[xshift=-10mm] \fill[blue!20] (-35,-35) rectangle (35,35); \fill[green!50] (-16,-16) rectangle (16,16); \draw[step=1,gray,thin] (-35,-35) grid (35,35); \draw[step=5,red,thin] (-35,-35) grid (35,35); \node[anchor=south] at (0,35) {Nyquist sampled image plane}; \draw[<->,yshift=-2mm] (-16,-35) -- node[below] {[[N_DFT]]} (16,-35); \draw[<->,yshift=-6mm] (-35,-35) -- node[below] {[[N_PX_IMAGE]]} (35,-35); \end{scope} \begin{scope}[xshift=65mm] \fill[red!50] (-35,-35) rectangle (35,35); \draw[step=5,red,thin] (-35,-35) grid (35,35); \node[anchor=south] at (0,35) {Binned image plane}; \draw[<->,yshift=-2mm] (-35,-35) -- node[below] {[[N_PX_CAM]]} (35,-35); \end{scope} \end{tikzpicture} \caption{Pupil and image arrays} \label{fig:4} \end{figure} @ The imagelets are binned into framelets on the detector frame: <>= __global__ void binning(float *frame, const float2 *wave, int N_DFT, source *src, float pixel_scale, float *theta_x, float *theta_y, char absolute_pointing, int N_PX_IMAGE, int N_PX_CAMERA, int N_SIDE_LENSLET) { int u, v, iLenslet, jLenslet, ij_DFT, M, offset, iPxL, jPxL, i, j, ij_CAM, ii, jj,iSource; float dx, dy, dx_int, dy_int; dx_int = dy_int = 0.0; // source index iSource = blockIdx.z; // BINNING if ( threads2lenslet(threadIdx, blockIdx, blockDim, &u, &v, N_PX_CAMERA, &iLenslet, &jLenslet, N_SIDE_LENSLET) ) { M = N_PX_IMAGE/N_PX_CAMERA; offset = (N_PX_IMAGE-N_DFT)/2; __shared__ float bin[16][16]; ii = threadIdx.x; jj = threadIdx.y; bin[ii][jj] = 0.0; <> for (i=0;i=0 && iPxL=0 && jPxL>= __global__ void binningTT7(float *frame, const float2 *wave, int N_DFT, source *src, float pixel_scale, float *theta_x, float *theta_y, char absolute_pointing, int N_PX_IMAGE, int N_PX_CAMERA, int N_SIDE_LENSLET) { int u, v, iLenslet, jLenslet, ij_DFT, M, offset, iPxL, jPxL, i, j, ij_CAM, ii, jj,iSource; float dx, dy, dx_int, dy_int; dx_int = dy_int = 0.0; // source index iSource = blockIdx.z; // BINNING if ( threads2lenslet(threadIdx, blockIdx, blockDim, &u, &v, N_PX_CAMERA, &iLenslet, &jLenslet, N_SIDE_LENSLET) ) { M = N_PX_IMAGE/N_PX_CAMERA; offset = (N_PX_IMAGE-N_DFT)/2; __shared__ float bin[16][16]; ii = threadIdx.x; jj = threadIdx.y; bin[ii][jj] = 0.0; <> for (i=0;i=0 && iPxL=0 && jPxL>= void imaging::readout(float exposureTime, float readOutNoiseRms) { <> noisify LLL gridDim, blockDim RRR (devStates, d__frame, N, readOutNoiseRms, unit_exposureTime, 0.0, 1.0, photoelectron_gain); } @ with <>= int N, Nb; float unit_exposureTime; unit_exposureTime = (N_FRAME>0) ? exposureTime/N_FRAME : exposureTime; N_PHOTON_PER_FRAME *= unit_exposureTime; N = N_PX_CAMERA * N_PX_CAMERA * N_LENSLET*N_SOURCE; Nb = (int) ceilf( sqrtf(N)/16.0 ); dim3 blockDim(16,16); dim3 gridDim(Nb,Nb); @ The background noise is added to the image with <>= void imaging::readout(float exposureTime, float readOutNoiseRms, float nBackgroundPhoton, float noiseFactor) { <> float nbgp, nf2; int n; nbgp = (N_FRAME>0) ? nBackgroundPhoton*N_FRAME : nBackgroundPhoton; /* n = N_SIDE_LENSLET*N_PX_CAMERA; n *= n; nbgp /= n; */ nf2 = noiseFactor*noiseFactor; noisify LLL gridDim, blockDim RRR (devStates, d__frame, N, readOutNoiseRms, unit_exposureTime, nbgp, nf2, photoelectron_gain); } @ with the kernels defined in the following <>= <> __global__ void noisify(curandState *state, float *frame, const int n_frame, const float readOutNoiseRms, float exposureTime, const float nBackgroundPhoton, const float noiseFactor2, const float photoelectron_gain) { int i, j, id; float lambda; curandState localState; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; id = j * gridDim.x * blockDim.x + i; //int id = threadIdx.x + blockIdx.x*blockDim.x; if ( id < n_frame ) { localState = state[id]; frame[id] += nBackgroundPhoton; frame[id] *= photoelectron_gain; frame[id] *= noiseFactor2; frame[id] *= exposureTime; lambda = frame[id]; // frame[id] = (float) curand_poisson(&localState, (double) lambda); /* if (lambda>0) { frame[id] = (lambda<30) ? curand_poisson_knuth( &localState, lambda) : curand_poisson_atkinson( &localState, lambda); } */ frame[id] = (lambda<64) ? curand_poisson_knuth( &localState, lambda) : (lambda<4000) ? (float) curand_poisson_gammainc( &localState, lambda) : curand_normal(&localState)*sqrt(lambda) + lambda; frame[id] += (1-noiseFactor2)*lambda/noiseFactor2; frame[id] = (frame[id]<0) ? 0 : frame[id]; frame[id] += (readOutNoiseRms>0) ? curand_normal(&localState)*readOutNoiseRms : 0; //frame[id] -= nBackgroundPhoton*exposureTime*photoelectron_gain; //frame[id] = (frame[id]>0) ? floorf(frame[id]) : ceilf(frame[id]); state[id] = localState; } } __global__ void flux_integral(float *frame, const int n_frame, float exposureTime, const float photoelectron_gain) { int i, j, id; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; id = j * gridDim.x * blockDim.x + i; //int id = threadIdx.x + blockIdx.x*blockDim.x; if ( id < n_frame ) { frame[id] *= photoelectron_gain; frame[id] *= exposureTime; } } @ <>= void imaging::noiseless_readout(float exposureTime) { <> flux_integral LLL gridDim, blockDim RRR (d__frame, N, unit_exposureTime, photoelectron_gain); } @ In CUDA, there are as many random generator states as the number of threads. These states are initialized providing the seed number [[LOCAL_RAND_SEED]]; [[LOCAL_RAND_SEED]] is set to the global variable [[RAND_SEED]]. <>= __global__ void setupRandomSequence_imaging(curandState *state, const int n_data, const int LOCAL_RAND_SEED) { int id = threadIdx.x + blockIdx.x*blockDim.x; if (id>= void imaging::reset_rng(int SEED) { int n_data; n_data = N_SOURCE*N_LENSLET*N_PX_CAMERA*N_PX_CAMERA; LOCAL_RAND_SEED = SEED; dim3 blockDim(256); dim3 gridDim(n_data/256+1); setupRandomSequence_imaging LLL gridDim,blockDim RRR (devStates, n_data, LOCAL_RAND_SEED); } @ \subsubsection{Poisson distribution} \label{sec:poisson-distribution} Two random number generators for the Poisson distribution have been implemented. Both are using uniformly distributed random numbers. The first method used the famous algorithm of D. Knuth: <>= __device__ float curand_poisson_knuth(curandState *state, float lambda) { unsigned int k = 0; float p=exp(lambda); do{ k++; p *= curand_uniform(state); }while (p > 1.0); return (float) k-1; } @ The execution speed of D. Knuth algorithm is proportional to $\lambda$. For large $\lambda$, a preferable algorithm is the one derived by A. C. Atkinson: <>= __device__ float curand_poisson_atkinson(curandState *state, float lambda) { /* The algorithm is ???method PA??? from ???The Computer Generation of Poisson Random Variables??? by A. C. Atkinson, Journal of the Royal Statistical Society Series C (Applied Statistics) Vol. 28, No. 1. (1979), pages 29-35. Method PA is on page 32. */ float c, beta, alpha, k, u, x, n, v, y, lhs, rhs; c = 0.767 - 3.36/lambda; beta = PI/sqrt(3.0*lambda); alpha = beta*lambda; k = logf(c) - lambda - logf(beta); do{ u = curand_uniform(state); x = (alpha - logf((1.0 - u)/u))/beta; n = floorf(x + 0.5); if (n >= 0) { v = curand_uniform(state); y = alpha - beta*x; lhs = 1.0 + expf(y); lhs *= lhs; lhs = y + logf(v/lhs); rhs = k + n*logf(lambda) - lgammaf(n+1); } }while(lhs > rhs); return n; } @ \subsection{Image analysis} \label{sec:image-analysis} The following routines are used to evaluate image properties: Strehl ratio, entrapped energy and full width at half maximum. \subsubsection{Strehl ratio} \label{sec:strehl-ratio} The Strehl ratio is given by the ratio between the PSF at frequency 0 and a reference PSF a frequency 0. \index{imaging!imaging!strehl\_ratio} <>= float imaging::strehl_ratio(imaging *ref) { int idx; idx = (N_PX_CAMERA - 1)/2; float psf0, ref_psf0, sr; HANDLE_ERROR( cudaMemcpy( &psf0, d__frame+idx*(1+N_PX_CAMERA),sizeof(float), cudaMemcpyDeviceToHost ) ); HANDLE_ERROR( cudaMemcpy( &ref_psf0, ref->d__frame+idx*(1+N_PX_CAMERA),sizeof(float), cudaMemcpyDeviceToHost ) ); sr = (psf0*ref->N_FRAME)/(ref_psf0*N_FRAME); return sr; } @ \subsection{Input/Output} \label{sec:inputoutput} The main parameters are displayed with: \index{imaging!imaging!info} <>= void imaging::info(void) { #ifndef SILENT fprintf(stdout,"\n\x1B[1;42m@(CEO)>imaging:\x1B[;42m\n"); fprintf(stdout," . imaging device : %4d\n",N_LENSLET); fprintf(stdout," . pupil sampling : %4d\n",N_PX_PUPIL); fprintf(stdout," . DFT sampling : %4d\n",N_DFT); fprintf(stdout," . imagelet sampling : %4d\n",N_PX_IMAGE); fprintf(stdout," . frame sampling : %4d\n",N_PX_CAMERA); fprintf(stdout," . source # : %4d\n",N_SOURCE); fprintf(stdout,"----------------------------------------------------\x1B[0m\n"); #endif } @ The frame is saved to a file with: <>= void imaging::frame2file(const char *filename){ int n; n = N_PX_CAMERA*N_SIDE_LENSLET; n *= n; printf("n=%d\n",n); dev2file(filename,d__frame,n*N_SOURCE,n,N_SOURCE); } @ The frame is plotted with the plot.ly <>= void imaging::show_frame(char *filename) { int nbyte; float *data; plotly_properties prop; nbyte = sizeof(float) *N_PX_CAMERA*N_PX_CAMERA*N_LENSLET*N_SOURCE; HANDLE_ERROR( cudaHostAlloc( (void**)&data, nbyte, cudaHostAllocDefault ) ); HANDLE_ERROR( cudaMemcpy( data, d__frame,nbyte, cudaMemcpyDeviceToHost ) ); prop.set("xtitle","X"); prop.set("ytitle","Y"); prop.set("ztitle","[au]"); prop.set("title",""); prop.set("filename",filename); prop.aspect_ratio = N_SOURCE; prop.set("zdata",data, N_PX_CAMERA*N_SIDE_LENSLET, N_PX_CAMERA*N_SIDE_LENSLET*N_SOURCE); prop.set("colorscale","YIGnBu"); imagesc(&prop); HANDLE_ERROR( cudaFreeHost( data ) ); } <>= void imaging::show_frame(char *filename, imaging *ref) { int nbyte; char title[32]; float *data; plotly_properties prop; nbyte = sizeof(float)*N_PX_CAMERA*N_PX_CAMERA; HANDLE_ERROR( cudaHostAlloc( (void**)&data, nbyte, cudaHostAllocDefault ) ); HANDLE_ERROR( cudaMemcpy( data, d__frame,nbyte, cudaMemcpyDeviceToHost ) ); prop.set("xtitle","X"); prop.set("ytitle","Y"); prop.set("ztitle","[au]"); sprintf(title,"Strehl ratio=%.2f%%",strehl_ratio(ref)*100); prop.set("title",title); prop.set("filename",filename); prop.set("zdata",data,N_PX_CAMERA,N_PX_CAMERA); prop.set("colorscale","YIGnBu"); imagesc(&prop); HANDLE_ERROR( cudaFreeHost( data ) ); } @ \section{Tests} \label{sec:tests} \subsection{Test 1} \label{sec:test-1} For this test, the relative position of the guide star and the weavefront sensor is demonstrated. @ The main function is: <>= <
> int main(int argc,char *argv[]) { <> gs.wavefront.show_amplitude("imaging/pupil"); imgr.pixel_scale = 2*gs.wavelength()/2.0; imgr.set_pointing_direction(imgr.pixel_scale*5,PI/4); // imgr.set_pointing_direction(0.0,0.0); imgr.propagate(&gs); imgr.show_frame("imaging/frame"); <> } @ The telescope pupil is defined with the [[mask]] structure, the pupil is sample with [[N_PX]] pixels accross the diameter: <>= int N_PX, N_PX2; N_PX2 = N_PX = 60; N_PX2 *= N_PX; mask telescope; telescope.setup_circular(N_PX); <>= telescope.cleanup(); @ Lets define a guide star: <>= source gs; gs.setup(0.0,0.0,INFINITY,N_PX2); gs.photometric_band = "I"; gs.wavefront.masked(&telescope); <>= gs.cleanup(); @ The imager is defined with the [[imaging]] structure: <>= imaging imgr; imgr.setup(N_PX,1,2,50,0.5,1); <>= imgr.cleanup(); @ % -------------------- \subsection{C} \label{sec:c} Test suite: <
>= #ifndef __CEO_H__ #include "h" #endif <>= <
> int main(int argc,char *argv[]) { int nLenslet, nLenslet2, nPxLenslet; nLenslet = nLenslet2 = 1; nLenslet2 *= nLenslet; nPxLenslet = 61; <> dev2file("amplitude.bin",src.wavefront.amplitude,src.wavefront.N_PX); /* float wf_rms, d; */ /* d = 0.04; */ /* atm.get_phase_screen(&src,d,NPX,d,NPX,0); */ /* wf_rms = 1E9*S.std(src.wavefront.phase, NPX2); */ /* printf("\n NGS WF RMS: %7.2fnm\n",wf_rms); */ /* src.phase2file("phaseScreen.bin"); */ src.photometric_band = "V"; printf(" . SRC wavelength %.2E\n",src.wavelength()); lenslet_array.propagate(&src); lenslet_array.frame2file("frame.bin"); printf(" __ TERMINATING ... __\n"); <> } @ Source: <>= int NPX, NPX2; NPX = nLenslet*nPxLenslet; NPX2 = NPX*NPX; source src; src.setup(ARCSEC(0) , 0, INFINITY, NPX2, "SRC"); <>= src.cleanup(); @ Atmosphere: <>= atmosphere atm; atm.setup(20e-2,30,10e3,10,0); <>= atm.cleanup(); @ Statistics: <>= stats S; S.setup(); <>= S.cleanup(); @ Lenslet array: <>= imaging lenslet_array; lenslet_array.setup(nPxLenslet,nLenslet, 2,1,1); <>= lenslet_array.cleanup(); @ \subsection{Python} \label{sec:python-tests} <>= #!/usr/bin/env python import sys sys.path.append("/home/rconan/CEO/python") from pylab import * from import * <
> @ The test suite consists of: \begin{description} \item[the main test] <
>= n = 256 nn = array([n,n],dtype=int32) zen = array( [0], dtype=float32) azi = array( [0], dtype=float32) src = Source("V",zen,azi,float("inf"),nn) tel = GMT(n, 25.5) src.masked(tel) imgr = Imaging(n, 1, 4, 4*n, 1, 1) imgr.propagate(src) frame0 = zeros( (n*4,n*4), order="c", dtype=float32) imgr.getframe(frame0) figure(1) ax = imshow(frame0**0.25,interpolation='None') colorbar(ax) draw() atm = GmtAtmosphere(15e-2,30) D = 25.0 p = D/n atm.get_phase_screen(src, 1, p, n, p, n, 0.0) src.masked(tel) imgr.reset() imgr.propagate(src) #frame0 = zeros( (n*4,n*4), order="c", dtype=float32) imgr.getframe(frame0) figure(2) ax = imshow(frame0,interpolation='None') colorbar(ax) show() @ \item[the field stop test] <>= n = 32 nn = array([n,n],dtype=int32) zen = zeros( 1, dtype=float32) azi = zeros( 1, dtype=float32) src = Source("V",zen,azi,float("inf"),nn) tel = Telescope(n) src.masked(tel) osf = 4 imgr = Imaging(n, 1, osf, osf*n, 1, 1) imgr.propagateThroughFieldStop(src,8.0) frame0 = zeros( (n*osf,n*osf), order="c", dtype=float32) imgr.getframe(frame0) figure(1) ax = imshow(frame0,interpolation='None') colorbar(ax) show() @ \item[the pyramid WFS test] <>= n = 32 nn = array([n,n],dtype=int32) zen = zeros( 1, dtype=float32) azi = zeros( 1, dtype=float32) src = Source("V",zen,azi,float("inf"),nn) tel = Telescope(n) src.masked(tel) osf = 4 imgr = Imaging(n, 1, osf, osf*n, 1, 1) imgr.propagateThroughPyramid(src) frame0 = zeros( (n*osf,n*osf), order="c", dtype=float32) imgr.getframe(frame0) figure(1) ax = imshow(frame0,interpolation='None') colorbar(ax) show() @ \end{description} \subsection{Matlab MEX} \label{sec:matlab-mex} The test suite is a Matlab script. It calls the mex function (THIS IS BROKEN): <>= #include #include #include "cublas_v2.h" #include "mex.h" #include "gpu/mxGPUArray.h" #ifndef __CEO_H__ #include "h" #endif #ifndef __SOURCE_H__ #include "source.h" #endif #ifndef __ATMOSPHERE_H__ #include "atmosphere.h" #endif #ifndef __IMAGING_H__ #include "imaging.h" #endif #ifndef __CENTROIDING_H__ #include "centroiding.h" #endif __global__ void apply_phase_offset(float* phase_io, const float* phase_offset, int n_phase) { int idx; idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx 5){ mexErrMsgIdAndTxt( "MATLAB:mxislogical:maxlhs", "Too many output arguments."); } inputIndex = 0; /* Create GPUArray from mxArray input and get underlying pointer. */ /* mxGPUArray const *x; */ /* x = mxGPUCreateFromMxArray(prhs[inputIndex++]); */ /* if (mxGPUGetClassID(x) != mxSINGLE_CLASS) { */ /* mexErrMsgIdAndTxt(errId, errMsg); */ /* } */ /* d__x = (float const *)(mxGPUGetDataReadOnly(x)); */ /* // --------------------------------------- */ /* mxGPUArray const *y; */ /* y = mxGPUCreateFromMxArray(prhs[inputIndex++]); */ /* if (mxGPUGetClassID(y) != mxSINGLE_CLASS) { */ /* mexErrMsgIdAndTxt(errId, errMsg); */ /* } */ /* d__y = (float const *)(mxGPUGetDataReadOnly(y)); */ // --------------------------------------- delta = mxGetPr(prhs[inputIndex++]); nSample = mxGetPr(prhs[inputIndex++]); reset = mxGetPr(prhs[inputIndex++]); L0 = mxGetPr(prhs[inputIndex++]); time = mxGetPr(prhs[inputIndex++]); // --------------------------------------- if (nrhs==6) { mxGPUArray const *phase_offset; phase_offset = mxGPUCreateFromMxArray(prhs[inputIndex++]); if (mxGPUGetClassID(phase_offset) != mxSINGLE_CLASS) { mexErrMsgIdAndTxt(errId, errMsg); } d__phase_offset = (float const *)(mxGPUGetDataReadOnly(phase_offset)); } /* Create GPUArray to hold the result and get underlying pointer. */ mxGPUArray *phase_screen; dims[0] = *nSample; dims[1] = *nSample; phase_screen = mxGPUCreateGPUArray(2,dims, mxSINGLE_CLASS,mxREAL,MX_GPU_INITIALIZE_VALUES); d__phase_screen = (float *)(mxGPUGetData(phase_screen)); mxGPUArray *frame; dims[0] = _N_PX_CAMERA_*N_SIDE_LENSLET; dims[1] = _N_PX_CAMERA_*N_SIDE_LENSLET; frame = mxGPUCreateGPUArray(2,dims, mxSINGLE_CLASS,mxREAL,MX_GPU_INITIALIZE_VALUES); mxGPUArray *cx; dims[0] = _N_LENSLET_*_N_SOURCE_; dims[1] = 1; cx = mxGPUCreateGPUArray(2,dims, mxSINGLE_CLASS,mxREAL,MX_GPU_INITIALIZE_VALUES); mxGPUArray *cy; cy = mxGPUCreateGPUArray(2,dims, mxSINGLE_CLASS,mxREAL,MX_GPU_INITIALIZE_VALUES); mxGPUArray *flux; flux = mxGPUCreateGPUArray(2,dims, mxSINGLE_CLASS,mxREAL,MX_GPU_INITIALIZE_VALUES); if (INIT==0) { // Source src.setup(ARCSEC(0) , 0, INFINITY, (*nSample)*(*nSample)); // Single layer turbulence profile float altitude[] = {0}, xi0[] = {1}, wind_speed[] = {10}, wind_direction[] = {0}; /* // GMT 7 layers turbulence profile float altitude[] = {25, 275, 425, 1250, 4000, 8000, 13000}, xi0[] = {0.1257, 0.0874, 0.0666, 0.3498, 0.2273, 0.0681, 0.0751}, wind_speed[] = {5.6540, 5.7964, 5.8942, 6.6370, 13.2925, 34.8250, 29.4187}, wind_direction[] = {0.0136, 0.1441, 0.2177, 0.5672, 1.2584, 1.6266, 1.7462}; */ // Atmosphere atm.setup(0.078125,(float) (*L0),altitude,xi0,wind_speed,wind_direction); // SH WFS lenslet_array.setup(); HANDLE_ERROR( cudaFree( lenslet_array.d__frame ) ); // Centroid cog.setup(); cog.cleanup(); mexAtExit(cleanup); INIT = 1; } if (*reset>0) { atm.reset(); } if (nlhs>1) { // atm.get_phase_screen(d__x,d__y,_N_PIXEL_,d__src,(float)(*time)); atm.get_phase_screen(*delta,*nSample,*delta,*nSample,&src,(float)(*time)); if (nrhs==6) { mexPrintf("\nApplying phase offset!\n"); dim3 gridDim(ceilf(_SOURCE_N_PX_/256),1); dim3 blockDim(256,1); apply_phase_offset LLL gridDim,blockDim RRR (src.wavefront.phase, d__phase_offset, _SOURCE_N_PX_); } HANDLE_ERROR( cudaMemcpy( d__phase_screen, src.wavefront.phase, sizeof(float)*_SOURCE_N_PX_, cudaMemcpyDeviceToDevice ) ); lenslet_array.d__frame = (float *)(mxGPUGetData(frame)); lenslet_array.propagate(&src); cog.d__cx = (float *)(mxGPUGetData(cx)); cog.d__cy = (float *)(mxGPUGetData(cy)); cog.d__mass = (float *)(mxGPUGetData(flux)); cog.get_data(lenslet_array.d__frame); } else { mexPrintf("Computing phase screen...\n"); //atm.get_phase_screen(d__phase_screen,*delta,*nSample,*delta,*nSample,&src,(float)(*time)); atm.get_phase_screen(&src,*delta,*nSample,*delta,*nSample,0); HANDLE_ERROR( cudaMemcpy( src.wavefront.phase,d__phase_screen, sizeof(float)*_SOURCE_N_PX_, cudaMemcpyDeviceToDevice ) ); } /* Wrap the result up as a MATLAB gpuArray for return. */ plhs[0] = mxGPUCreateMxArrayOnGPU(phase_screen); plhs[1] = mxGPUCreateMxArrayOnGPU(frame); plhs[2] = mxGPUCreateMxArrayOnGPU(cx); plhs[3] = mxGPUCreateMxArrayOnGPU(cy); plhs[4] = mxGPUCreateMxArrayOnGPU(flux); // atm.cleanup(); /* mxGPUDestroyGPUArray(x); */ /* mxGPUDestroyGPUArray(y); */ mxGPUDestroyGPUArray(phase_screen); mxGPUDestroyGPUArray(frame); mxGPUDestroyGPUArray(cx); mxGPUDestroyGPUArray(cy); mxGPUDestroyGPUArray(flux); }