% -*- mode: Noweb; noweb-code-mode: c-mode -*- @ \def\bk{\bar \kappa_i} \def\bkx{\bar \kappa_{x,i}} \def\bky{\bar \kappa_{y,i}} \def\dk{\Delta \kappa_i} \def\dphi{\Delta \varphi} \def\nk{N_\kappa} \def\na{N_\varphi} \index{atmosphere} \section{The files} \subsection{Header} <>= #ifndef __ATMOSPHERE_H__ #define __ATMOSPHERE_H__ #ifndef __UTILITIES_H__ #include "utilities.h" #endif #ifndef __SOURCE_H__ #include "source.h" #endif #ifndef __CENTROIDING_H__ #include "centroiding.h" #endif #include #include #include #include #include #include #define handle_error(msg) \ do { perror(msg); exit(EXIT_FAILURE); } while (0) struct layer { <> int setup(float _altitude, float _xi0, float _wind_speed_, float _wind_direction, float W, int N_W, float field_size, int OSF, float duration); }; struct profile { <> void setup(float *altitude, float *xi0, float *wind_speed, float *wind_direction, int N_LAYER); void cleanup(); }; struct atmosphere { <> void setup(float r0_, float L0, int N_LAYER, float *altitude, float *xi0, float *wind_speed, float *wind_direction); void setup(float r0_, float L0, int N_LAYER, float *altitude, float *xi0, float *wind_speed, float *wind_direction, float _L_, int _NXY_PUPIL_, float field_size, float duration); void setup(float r0_, float L0, int _NLAYER, float *altitude, float *xi0, float *wind_speed, float *wind_direction, float _L_, int _NXY_PUPIL_, float field_size, float duration, const char *fullpath_to_phasescreens, int _N_DURATION); void gmt_setup(); void gmt_setup(float r0_, float L0); void gmt_setup(float r0_, float L0, float _L_, int _NXY_PUPIL_, float field_size, float duration); void gmt_setup(float r0_, float L0, float _L_, int _NXY_PUPIL_, float field_size, float duration, const char *fullpath_to_phasescreens, int _N_DURATION); void gmt_setup(float r0_, float L0, int _RAND_SEED_); void gmt_setup(float r0_, float L0, float _L_, int _NXY_PUPIL_, float field_size, float duration, int _RAND_SEED_); void gmt_setup(float r0_, float L0, float _L_, int _NXY_PUPIL_, float field_size, float duration, const char *fullpath_to_phasescreens, int _N_DURATION, int _RAND_SEED_); void gmt_set_id(int _ID_); void gmt_set_eph(float _EPH_); void cleanup(void); void info(void); void reset(void); void save_layer_phasescreens(const char* fullpath_to_phasescreens, int _N_DURATION); void get_phase_screen(float *phase_screen, float const *x, float const *y, int N_xy, source *src, float time); void get_phase_screen(float const *x, float const *y, int N_xy, source *src, float time); void get_phase_screen(float const delta_x, int N_x, float const delta_y, int N_y, source *src, float time); void get_phase_screen(float *phase_screen, float const delta_x, int N_x, float const delta_y, int N_y, source *src, float time); void get_phase_screen(source *src, float const delta_x, int N_x, float const delta_y, int N_y, float time); void get_phase_screen(source *src, float const delta_x, int N_x, float const delta_y, int N_y, float time, float exponent); void get_phase_screen(source *src, int N_SRC, float const delta_x, int N_x, float const delta_y, int N_y, float time); void get_phase_screen(source *src, float time); void get_phase_screen_gradient(float *sx, float *sy, float *x, float *y, int Nxy, source *src, float time); void get_phase_screen_gradient(float *sx, float *sy, int NL, float const d, source *src, float time); void get_phase_screen_gradient(float *sx, float *sy, int NL, char *valid_lenslet, float const d, source *src, float time); void get_phase_screen_gradient(centroiding *cog, int NL, float const d, source *src, float time); void get_phase_screen_gradient_rolling_shutter(centroiding *cog, int NL, float const d, source *src, float time, float delay); void get_phase_screen_gradient(centroiding *cog, int NL, float const d, source *src, int N_SRC, float time); void get_phase_screen_gradient(centroiding *cog, int NL, char *valid_lenslet, float const d, source *src, int N_SRC, float time); void get_phase_screen_circ_centroids(centroiding *cog, const float R, source *src, int N_SRC, float time); void get_phase_screen_circ_uplink_centroids(centroiding *cog, const float R, source *src, int N_SRC, float time, char focused); void rayTracing(const float* x_PUPIL,const float* y_PUPIL, float* phase_screen_PUPIL, const int NXY_PUPIL, const source* src, const float tau); void rayTracing(source *src, const float delta_x, const int N_X, const float delta_y, const int N_Y, const float tau); }; #endif // __ATMOSPHERE_H__ @ \subsection{Source} <>= #include "atmosphere.h" <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> @ \section{Parameters} \label{sec:prms} \index{atmosphere!atmosphere} \index{atmosphere!profile} \index{atmosphere!layer} The atmosphere parameters are \begin{itemize} \item the Fried parameter $r_0$ are a given photometric band <>= char *photometric_band; float wavelength, r0, wavenumber; @ \item the number of atmospheric layers <>= int N_LAYER; @ \item the angular extend of the phase screen <>= float field_size; @ \item the outer scale <>= float L0; @ \item the oversampling factor [[OSF]] of the phase screen in the layers <>= int layers_OSF; @ \item the time duration [[duration]] and the time origin [[tau0]] of the layer phase screens <>= float layers_duration, layers_tau0; @ \item the pupil width in meter [[W]] and pixel [[N_W]] <>= float W; int N_W; @ \item the file stream where the phase screen of the atmosphere layers are stored <>= float *phase_screen_LAYER; @ \item the number of phase screen chunks of duration [[layers_duration]] in [[layers_fid]] <>= int N_DURATION; @ \item the random seed generator <>= int LOCAL_RAND_SEED; @ \item the identity number of the GMT atmosphere model <>= int ID; @ \item the height of the atmosphere entrance pupil <>= float EPH; @ \end{itemize} The atmospheric layer parameters are: \begin{itemize} \item the altitude <>= float altitude; @ \item the fractional $r_0$ : $$\xi_0(h)= \left( r_0(h)\over r_0 \right)^{-5/3}$$ <>= float xi0; @ \item the wind vector <>= float wind_speed, wind_direction, vx, vy; @ \item the width and length of the phase screen in meter <>= float WIDTH_LAYER, LENGTH_LAYER; @ \item the width and length of the phase screen in pixel <>= int N_WIDTH_LAYER, N_LENGTH_LAYER; @ \item layers phase screen <>= float *d__phase_screen_LAYER; @ \item the size in memory of all the phase screnn in the layers <>= unsigned long N_PHASE_LAYER; @ \item phase screen layers file ID <>= //FILE *layers_fid; size_t mmap_size; @ \end{itemize} \section{Functions} \label{sec:functions} The main parameters of the atmosphere are displayed with the [[info]] routine: \index{atmosphere!atmosphere!info} <>= void atmosphere::info(void) { INFO("\n\x1B[1;42m@(CEO)>atmosphere: Von Karman atmospheric turbulence model\x1B[;42m\n"); INFO(" . wavelength = %5.2fnm\n . r0 = %5.2fcm\n . L0 = %5.2fm\n", wavelength*1e9,r0*1e2,turbulence.L0); INFO("----------------------------------------------------\n"); INFO(" Layer Altitude[m] fr0 wind([m/s] [deg])\n"); for (int kLayer=0;kLayer>= float l0, L, f, delta, N_k, N_a, kmin; float *altitude, *xi0, *wind_speed, *wind_direction; @ <>= float *zeta1, *eta1, *zeta2, *eta2; curandState *devStates; profile turbulence, *d__turbulence; layer *layers, *d__layers; @ This variables are defined in the setup routines: \index{atmosphere!atmosphere!setup} <>= void atmosphere::setup(float r0_, float L0, int _NLAYER, float *altitude, float *xi0, float *wind_speed, float *wind_direction) { wavelength = 500e-9; wavenumber = wavelength/2/PI; r0 = r0_; N_LAYER = _NLAYER; turbulence.L0 = L0; turbulence.setup(altitude, xi0, wind_speed, wind_direction, N_LAYER); field_size = 0.0; N_PHASE_LAYER = 0; W = 0.0; N_W = 0; layers_OSF = 2; layers_duration = 0.0; layers_tau0 = 0.0; <> phase_screen_LAYER = NULL; d__phase_screen_LAYER = NULL; info(); } @ \paragraph{Ray tracing} The atmospheric phase screens of each turbulence layer can be pre--computed and the phase screen in the aperture is derived by ray tracing (using bi--linear interpolation) through the layers. The additional input parameters are the width [[W]] and the resolution [[N_W]] of the phase screens in the direction perpendicular to the winds, the field--of--view [[field_size]] and the phase screen time length [[duration]]. \index{atmosphere!atmosphere!setup} <>= void atmosphere::setup(float r0_, float L0, int _NLAYER, float *altitude, float *xi0, float *wind_speed, float *wind_direction, float _W_, int _N_W_, float _field_size_, float duration) { wavelength = 500e-9; wavenumber = wavelength/2/PI; r0 = r0_; N_LAYER = _NLAYER; turbulence.L0 = L0; turbulence.setup(altitude, xi0, wind_speed, wind_direction, N_LAYER); field_size = _field_size_; W = _W_; N_W = _N_W_; layers_OSF = 2; layers_duration = duration; layers_tau0 = 0.0; <> HANDLE_ERROR( cudaMalloc( (void**)&d__phase_screen_LAYER, N_PHASE_LAYER*sizeof(float) ) ); <> } @ The pre--computed phase screens can also be saved in a file [[fullpath_to_phasescreens]] for latter use. The phase screens are split in [[N_DURATION]] chunks of the same [[duration]]. The phase screens are loaded one chunk at a time in the device to no overload the device memory. The total time length of the phase screens are then $[[N_DURATION]]\times[[duration]]$. If the file already exists, it is instead loaded into the structure. <>= void atmosphere::setup(float r0_, float L0, int _NLAYER, float *altitude, float *xi0, float *wind_speed, float *wind_direction, float _W_, int _N_W_, float _field_size_, float duration, const char *fullpath_to_phasescreens, int _N_DURATION) { wavelength = 500e-9; wavenumber = wavelength/2/PI; r0 = r0_; N_LAYER = _NLAYER; turbulence.L0 = L0; turbulence.setup(altitude, xi0, wind_speed, wind_direction, N_LAYER); field_size = _field_size_; W = _W_; N_W = _N_W_; layers_OSF = 2; layers_duration = duration; layers_tau0 = 0.0; <> <> } @ with <>= HANDLE_ERROR( cudaMalloc( (void**)&d__phase_screen_LAYER, N_PHASE_LAYER*sizeof(float) ) ); N_DURATION = _N_DURATION; struct stat sb; if (stat(fullpath_to_phasescreens,&sb)==0) { <> } else { save_layer_phasescreens(fullpath_to_phasescreens, N_DURATION); } @ %def save_layer_phasescreens For the GMT simulations, the GMT standard atmosphere model is called with: \index{atmosphere!atmosphere!gmt\_setup} <>= void atmosphere::gmt_setup() { wavelength = 500e-9; wavenumber = wavelength/2/PI; r0 = 15e-2; turbulence.L0 = 60; ID = 0; <> field_size = 0.0; N_PHASE_LAYER = 0; W = 0.0; N_W = 0; layers_OSF = 2; layers_duration = 0.0; layers_tau0 = 0.0; <> phase_screen_LAYER = NULL; d__phase_screen_LAYER = NULL; } @ The height of the atmosphere entrance pupil needs to be set before setting--up the atmosphere: <>= void atmosphere::gmt_set_eph(float _EPH_) { EPH = _EPH_; } @ The GMT atmosphere ID needs to be set before setting--up the atmosphere: <>= void atmosphere::gmt_set_id(int _ID_) { ID = _ID_; } @ with <>= N_LAYER = 7; float altitude[7], xi0[7], wind_speed[7], wind_direction[7]; int n_bytes = sizeof(float)*7; switch(ID) { case 2 : { INFO("@(CEO)>atmosphere: GMT case 2: STRONG GROUND LAYER (Goodwin LCO Jan08, case 7)\n"); float altitude_2[] = {25, 275, 425, 1250, 4000, 8000, 13000}, xi0_2[] = {0.2039, 0.1116, 0.1706, 0.2281, 0.1694, 0.0615, 0.054}, wind_speed_2[] = {9.4092, 9.6384, 9.7905, 2.7028, 8.2883, 30.1824, 13.0364}, wind_direction_2[] = {0.0136, 0.1441, 0.2177, 0.5672, 1.2584, 1.6266, 1.7462}; memcpy(altitude, altitude_2, n_bytes); memcpy(xi0, xi0_2, n_bytes); memcpy(wind_speed, wind_speed_2, n_bytes); memcpy(wind_direction, wind_direction_2, n_bytes); break; } case 3 : { INFO("@(CEO)>atmosphere: GMT case 3: GOOD CONDITIONS (Goodwin LCO Jan08, case 1)\n"); float altitude_3[] = {25, 275, 425, 1250, 4000, 8000, 13000}, xi0_3[] = {0.1410, 0.0381, 0.0542, 0.3403, 0.2528, 0.0917, 0.0819}, wind_speed_3[] = {2.1953, 2.2575, 2.3029, 2.7028, 8.2883, 30.1824, 13.0364}, wind_direction_3[] = {0.0136, 0.1441, 0.2177, 0.5672, 1.2584, 1.6266, 1.7462}; memcpy(altitude, altitude_3, n_bytes); memcpy(xi0, xi0_3, n_bytes); memcpy(wind_speed, wind_speed_3, n_bytes); memcpy(wind_direction, wind_direction_3, n_bytes); break; } case 4 : { INFO("@(CEO)>atmosphere: GMT case 4: POOR CONDITIONS (Goodwin LCO Jan08, case 9)\n"); float altitude_4[] = {25, 275, 425, 1250, 4000, 8000, 13000}, xi0_4[] = {0.1335, 0.0730, 0.1117, 0.3311, 0.2170, 0.0613, 0.0724}, wind_speed_4[] = {9.4092, 9.6384, 9.7905, 10.8527, 18.2550, 39.1520, 43.7936}, wind_direction_4[] = {0.0136, 0.1441, 0.2177, 0.5672, 1.2584, 1.6266, 1.7462}; memcpy(altitude, altitude_4, n_bytes); memcpy(xi0, xi0_4, n_bytes); memcpy(wind_speed, wind_speed_4, n_bytes); memcpy(wind_direction, wind_direction_4, n_bytes); break; } case 5 : { INFO("@(CEO)>atmosphere: GMT case 5: THIN CLOUDS (Goodwin LCO Jan08, case 5)\n"); float altitude_5[] = {25, 275, 425, 1250, 4000, 8000, 13000}, xi0_5[] = {0.1335, 0.0730, 0.1117, 0.3311, 0.2170, 0.0613, 0.0724}, wind_speed_5[] = {9.4092, 9.6384, 9.7905, 10.8527, 18.2550, 39.1520, 43.7936}, wind_direction_5[] = {0.0136, 0.1441, 0.2177, 0.5672, 1.2584, 1.6266, 1.7462}; memcpy(altitude, altitude_5, n_bytes); memcpy(xi0, xi0_5, n_bytes); memcpy(wind_speed, wind_speed_5, n_bytes); memcpy(wind_direction, wind_direction_5, n_bytes); break; } default : { INFO("@(CEO)>atmosphere: GMT case 1: MEDIAN (Goodwin LCO Jan08, case 5)\n"); float altitude_1[] = {25, 275, 425, 1250, 4000, 8000, 13000}, xi0_1[] = {0.1257, 0.0874, 0.0666, 0.3498, 0.2273, 0.0681, 0.0751}, wind_speed_1[] = {5.6540, 5.7964, 5.8942, 6.6370, 13.2925, 34.8250, 29.4187}, wind_direction_1[] = {0.0136, 0.1441, 0.2177, 0.5672, 1.2584, 1.6266, 1.7462}; memcpy(altitude, altitude_1, n_bytes); memcpy(xi0, xi0_1, n_bytes); memcpy(wind_speed, wind_speed_1, n_bytes); memcpy(wind_direction, wind_direction_1, n_bytes); } } for (int k=0;k>= void atmosphere::gmt_setup(float r0_, float L0) { wavelength = 500e-9; wavenumber = wavelength/2/PI; r0 = r0_; turbulence.L0 = L0; <> field_size = 0.0; N_PHASE_LAYER = 0; W = 0.0; N_W = 0; layers_OSF = 2; layers_duration = 0.0; layers_tau0 = 0.0; <> phase_screen_LAYER = NULL; d__phase_screen_LAYER = NULL; } @ <>= void atmosphere::gmt_setup(float r0_, float L0, int _RAND_SEED_) { wavelength = 500e-9; wavenumber = wavelength/2/PI; r0 = r0_; turbulence.L0 = L0; <> field_size = 0.0; N_PHASE_LAYER = 0; W = 0.0; N_W = 0; layers_OSF = 2; layers_duration = 0.0; layers_tau0 = 0.0; <> LOCAL_RAND_SEED = _RAND_SEED_; <> phase_screen_LAYER = NULL; d__phase_screen_LAYER = NULL; } @ <>= void atmosphere::gmt_setup(float r0_, float L0, float _W_, int _N_W_, float _field_size_, float duration) { <> <> HANDLE_ERROR( cudaMalloc( (void**)&d__phase_screen_LAYER, N_PHASE_LAYER*sizeof(float) ) ); <> } @ with <>= wavelength = 500e-9; wavenumber = wavelength/2/PI; r0 = r0_; turbulence.L0 = L0; <> field_size = _field_size_; W = _W_; N_W = _N_W_; N_PHASE_LAYER = 0; layers_OSF = 2; layers_duration = duration; layers_tau0 = 0.0; @ <>= void atmosphere::gmt_setup(float r0_, float L0, float _W_, int _N_W_, float _field_size_, float duration, int _RAND_SEED_) { <> <> LOCAL_RAND_SEED = _RAND_SEED_; <> HANDLE_ERROR( cudaMalloc( (void**)&d__phase_screen_LAYER, N_PHASE_LAYER*sizeof(float) ) ); <> } @ The phase screen can be <>= void atmosphere::gmt_setup(float r0_, float L0, float _W_, int _N_W_, float _field_size_, float duration, const char *fullpath_to_phasescreens, int _N_DURATION) { <> <> <> } @ <>= void atmosphere::gmt_setup(float r0_, float L0, float _W_, int _N_W_, float _field_size_, float duration, const char *fullpath_to_phasescreens, int _N_DURATION, int _RAND_SEED_) { <> <> LOCAL_RAND_SEED = _RAND_SEED_; <> <> } @ <>= <> LOCAL_RAND_SEED = RAND_SEED; <> @ <>= info(); N_DURATION = 0; <> HANDLE_ERROR( cudaMalloc( (void**)&d__turbulence, sizeof(profile) ) ); HANDLE_ERROR( cudaMemcpy( d__turbulence, &turbulence, sizeof(profile), cudaMemcpyHostToDevice ) ); @ <>= int nel = N_LAYER*turbulence.N_k*turbulence.N_a; stopwatch tid; tid.tic(); INFO("\n@(CEO)>atmosphere: initializing %d variates with SEED=%d ...",nel,LOCAL_RAND_SEED); HANDLE_ERROR( cudaMalloc( (void**)&devStates, 4*nel*sizeof(curandState)) ); HANDLE_ERROR( cudaMalloc( (void**)&zeta1, nel*sizeof(float)) ); HANDLE_ERROR( cudaMalloc( (void**)&eta1, nel*sizeof(float)) ); HANDLE_ERROR( cudaMalloc( (void**)&zeta2, nel*sizeof(float)) ); HANDLE_ERROR( cudaMalloc( (void**)&eta2, nel*sizeof(float)) ); dim3 blockDim(64,1); dim3 gridDim( 1+4*nel/64 , 1 ); setupRandomSequence LLL gridDim,blockDim RRR (devStates, 4*nel, LOCAL_RAND_SEED); HANDLE_ERROR( cudaDeviceSynchronize() ); generateVariates LLL gridDim,blockDim RRR (zeta1, eta1, zeta2, eta2, devStates, nel); HANDLE_ERROR( cudaDeviceSynchronize() ); INFO("done!\n"); tid.toc(); @ \index{atmosphere!profile!setup} <>= void profile::setup(float *altitude_, float *xi0_, float *wind_speed_, float *wind_direction_, int N_LAYER) { altitude = (float*)malloc(sizeof(float)*N_LAYER); xi0 = (float*)malloc(sizeof(float)*N_LAYER); wind_speed = (float*)malloc(sizeof(float)*N_LAYER); wind_direction = (float*)malloc(sizeof(float)*N_LAYER); for (int k=0;kprofile:\n"); INFO(" . N_k = %f\n",N_k); INFO(" . N_a = %f\n",N_a); INFO(" . frequency range = %8.2f\n",f); INFO(" . frequency resolution = %6.4f\n",delta); INFO(" . minimum frequency = %6.4f\n",kmin); } @ \index{atmosphere!profile!cleanup} <>= void profile::cleanup(void) { free(altitude); free(xi0); free(wind_speed); free(wind_direction); } @ \index{atmosphere!atmosphere!save\_layer\_phasescreens} <>= void atmosphere::save_layer_phasescreens(const char* _fullpath_to_phasescreens, int _N_DURATION) { int k_DURATION; size_t N_WRITE, N_DATA; stopwatch tid; time_t now; struct tm beg; char buff[128]; FILE *fid; fid = fopen(_fullpath_to_phasescreens,"wb"); fwrite(&_N_DURATION,sizeof(int),1,fid); N_DATA = N_PHASE_LAYER*_N_DURATION; phase_screen_LAYER = (float*)malloc(sizeof(float)*N_DATA); for (k_DURATION=0;k_DURATION<_N_DURATION;k_DURATION++) { time(&now); beg = *localtime(&now); INFO("\n * LAYER PHASE SCREENS COMPUTING: %d/%d *\n",k_DURATION+1,_N_DURATION); layers_tau0 = k_DURATION*layers_duration; <> HANDLE_ERROR( cudaMemcpy( phase_screen_LAYER + k_DURATION*N_PHASE_LAYER, d__phase_screen_LAYER, sizeof(float)*N_PHASE_LAYER, cudaMemcpyDeviceToHost ) ); N_WRITE = fwrite(phase_screen_LAYER + k_DURATION*N_PHASE_LAYER, sizeof(float),N_PHASE_LAYER,fid); INFO(" ==>> Written %lu elements out of %lu elements to file %s\n", N_WRITE,N_PHASE_LAYER,_fullpath_to_phasescreens); if (N_WRITE>= FILE *layers_fid; size_t N_READ; layers_fid = fopen(fullpath_to_phasescreens,"rb"); INFO("\n@(CEO)>atmosphere: Loading the phase screens in the layers from file %s...\n",fullpath_to_phasescreens); N_READ = fread(&N_DURATION, sizeof(int), 1,layers_fid); if (N_READ==0) ERROR("Cannot read file!"); tid.tic(); phase_screen_LAYER = (float*)malloc(sizeof(float)*N_PHASE_LAYER*N_DURATION); N_READ = fread(phase_screen_LAYER,sizeof(float),N_PHASE_LAYER*N_DURATION,layers_fid); if (N_READ==0) ERROR("Cannot read file!"); HANDLE_ERROR( cudaMemcpy( d__phase_screen_LAYER, phase_screen_LAYER, sizeof(float)*N_PHASE_LAYER, cudaMemcpyHostToDevice ) ); tid.toc(); fclose(layers_fid); @ MEMORY MAP: <>= FILE *layers_fid; size_t N_READ; layers_fid = fopen(fullpath_to_phasescreens,"rb"); N_READ = fread(&N_DURATION, sizeof(int), 1,layers_fid); if (N_READ==0) ERROR("Cannot read file!"); fclose(layers_fid); struct stat sb; int fd; off_t offset, pa_offset; INFO("\n@(CEO)>atmosphere: Memory-mapping the phase screens in the layers from file %s...\n",fullpath_to_phasescreens); tid.tic(); fd = open(fullpath_to_phasescreens, O_RDONLY); if (fd == -1) handle_error("open"); if (fstat(fd, &sb) == -1) /* To obtain file size */ handle_error("fstat"); offset = sizeof(int); pa_offset = offset & ~(sysconf(_SC_PAGE_SIZE) - 1); mmap_size = sb.st_size - pa_offset; //phase_screen_LAYER = (float*)malloc(sizeof(float)*N_PHASE_LAYER*N_DURATION); //N_READ = fread(phase_screen_LAYER,sizeof(float),N_PHASE_LAYER*N_DURATION,layers_fid); //if (N_READ==0) // ERROR("Cannot read file!"); phase_screen_LAYER = (float *) mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, fd, pa_offset); if (phase_screen_LAYER == MAP_FAILED) handle_error("mmap"); HANDLE_ERROR( cudaMemcpy( d__phase_screen_LAYER, phase_screen_LAYER, sizeof(float)*N_PHASE_LAYER, cudaMemcpyHostToDevice ) ); tid.toc(); @ The phase screens of the layers are defined such as the x--axis of the screens are aligned with the wind direction $\theta$. The sketch below shows a phase screen in blue and inside, the location of the telescope pupil in green at W at time $t=0$.. For a layer at altitude $h$ and a field--of--view $\mathrm{fov}$, the pupil projected onto the layer is given by the pink square. \begin{center} \input{phaseScreenDef.tex} \end{center} An atmospheric layer parameters are set with: \index{atmosphere!layer!setup} <>= int layer::setup(float _altitude, float _xi0, float _wind_speed, float _wind_direction, float W, int N_W, float field_size, int OSF, float duration) { int N = 0; float c, d, tan_wind; altitude = _altitude; xi0 = _xi0; wind_speed = _wind_speed; wind_direction = _wind_direction; vx = wind_speed*cosf(wind_direction); vy = wind_speed*sinf(wind_direction); if (N_W>0) { d = W/(N_W-1); d /= OSF; tan_wind = tanf(wind_direction); c = ( fabsf(tan_wind) + 1.0)/sqrtf(tan_wind*tan_wind + 1.0); WIDTH_LAYER = (W + 2*altitude*tanf(0.5*field_size))*c; LENGTH_LAYER = WIDTH_LAYER + wind_speed*duration; N_WIDTH_LAYER = (int) ceilf( WIDTH_LAYER/d ) + 1; N_LENGTH_LAYER = (int) ceilf( LENGTH_LAYER/d ) + 1; WIDTH_LAYER = d*(N_WIDTH_LAYER-1); LENGTH_LAYER = d*(N_LENGTH_LAYER-1); N = N_WIDTH_LAYER*N_LENGTH_LAYER; INFO("(%5.2fX%5.2f) meters & (%dX%d) pixels ==> %.2fMB \n", LENGTH_LAYER,WIDTH_LAYER, N_LENGTH_LAYER,N_WIDTH_LAYER,N*sizeof(float)/powf(2,20)); } return N; } @ The [[layer]] structures are filled with their parameters and copied to the device <>= layers = (layer*) malloc( sizeof(layer)*N_LAYER ); HANDLE_ERROR( cudaMalloc( (void**)&d__layers, sizeof(layer)*N_LAYER ) ); N_PHASE_LAYER = 0; INFO("\n@(CEO)>atmosphere: __ Layer Phase Screen __\n"); for (int i_LAYER=0; i_LAYER0) INFO(" Total: %.2fMB\n",N_PHASE_LAYER*sizeof(float)/powf(2,20)); @ The phase screen of each layer is computed with: <>= dim3 blockDimLayers(16,16); dim3 gridDimLayers; source layers_src; layers_src.setup("V",0.0,0.0,INFINITY); int N = 0, i_LAYER, N_L, __N_W__; float delta_W = W/(N_W-1); delta_W /= layers_OSF; tid.tic(); INFO(" . Computing layer phase screen: "); for (i_LAYER=0;i_LAYER>= __global__ void setupRandomSequence(curandState *state, int n_state, int seed) { int id; id = blockIdx.x * blockDim.x + threadIdx.x; if (id>= __global__ void generateVariates(float *zeta1, float *eta1, float *zeta2, float *eta2, curandState *state, int n_state) { int id; curandState localState; id = blockIdx.x * blockDim.x + threadIdx.x; if (id>= void atmosphere::reset(void) { int nel = N_LAYER*turbulence.N_k*turbulence.N_a; dim3 blockDim(64,1); dim3 gridDim( 1+nel/64 , 1 ); generateVariates LLL gridDim,blockDim RRR (zeta1, eta1, zeta2, eta2, devStates, nel); } @ The dynamically allocated variables are freed in the cleanup routine: \index{atmosphere!atmosphere!cleanup} <>= void atmosphere::cleanup(void) { INFO("@(CEO)>atmosphere: freeing memory!\n"); turbulence.cleanup(); HANDLE_ERROR( cudaFree( devStates ) ); HANDLE_ERROR( cudaFree( zeta1 )); HANDLE_ERROR( cudaFree( eta1 ) ); HANDLE_ERROR( cudaFree( zeta2 ) ); HANDLE_ERROR( cudaFree( eta2 ) ); if (d__phase_screen_LAYER!=NULL) { HANDLE_ERROR( cudaFree( d__phase_screen_LAYER ) ); free( layers ); HANDLE_ERROR( cudaFree( d__layers ) ); } if (phase_screen_LAYER!=NULL) munmap(phase_screen_LAYER, mmap_size); // free( phase_screen_LAYER ); } @ The phase screen are defined in the telescope pupil at the ground \index{atmosphere!atmosphere!get\_phase\_screen} <>= void atmosphere::get_phase_screen(float *phase_screen, float const *x, float const *y, int N_xy, source *src, float time) { dim3 blockDim(16,16); dim3 gridDim( ceilf(sqrt(N_xy)/16) , ceilf(sqrt(N_xy)/16)); plps LLL gridDim , blockDim RRR (phase_screen, x, y, N_xy, d__turbulence, wavenumber*powf(r0,-5.0/6.0), d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time); } @ \index{atmosphere!atmosphere!get\_phase\_screen} <>= void atmosphere::get_phase_screen(float const *x, float const *y, int N_xy, source *src, float time) { dim3 blockDim(16,16); dim3 gridDim( ceilf(sqrt(N_xy)/16) , ceilf(sqrt(N_xy)/16)); plps LLL gridDim , blockDim RRR (src->wavefront.phase, x, y, N_xy, d__turbulence, wavenumber*powf(r0,-5.0/6.0), d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time); } @ and computed with the kernel <>= __global__ void plps(float *phase_screen, float const *x, float const *y, int N_xy, profile *turb, float r0, layer *layers, int N_LAYER, float *zeta1, float *eta1, float *zeta2, float *eta2, source *src, float time) { <> <> sum = 0; if (kl> for (i=0;iN_k;i++) { <> for (j=0;jN_a;j++) { <> for (l=0;l> } sum += sqrt_spectrum_kernel*sum_l; } } phase_screen[kl] = 1.4*r0*sum; } } @ The kernel starts with some declarations: <>= int i, j, l, ij, ijl, kl, i_source; float freq_mag0, delta_freq_mag0, f_red0, f_red, freq_L0_square, x_kl, y_kl, x_kl0, y_kl0, gl, freq_mag, delta_freq_mag, sqrt_spectrum_kernel, freq_ang, cos_freq_ang, sin_freq_ang; <>= <> float sum, sum_l; @ Each thread is computing one value of the phase screen at the coordinate [[x_kl = x[kl]]] and [[y_kl = y[kl]]] <>= i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; i_source = blockIdx.z; kl = j * gridDim.x * blockDim.x + i + i_source*N_xy; x_kl0 = x[kl]; y_kl0 = y[kl]; @ Next a few new variables are defined, $$[[f_red0]]=f^{1/N_k},$$, $$[[freq_mag0]]=\kappa_{min}{[[f_red0]]+1)\over 2[[f_red0]]},$$ $$[[delta_freq_mag0]]=\kappa_{min}{[[f_red0]]-1)\over [[f_red0]]},$$ $$[[freq_L0_square]]=\left(2\pi\over\mathcal L_0\right)^2.$$ <>= f_red0 = powf(turb->f,1/turb->N_k); freq_mag0 = 0.5*turb->kmin*( f_red0 + 1 )/f_red0; delta_freq_mag0 = turb->kmin*( f_red0 - 1 )/f_red0; f_red = 1; freq_L0_square = 2*PI/turb->L0; freq_L0_square *= freq_L0_square; @ The outer loop is the sum over the frequency magnitude. It computes $\bk$, $\dk$ and $\Gamma\left( \bk\right)$: <>= f_red *= f_red0; freq_mag = freq_mag0*f_red; delta_freq_mag = delta_freq_mag0*f_red; sqrt_spectrum_kernel = powf( freq_mag*freq_mag + freq_L0_square, -11.0/12.0)* sqrt(freq_mag*delta_freq_mag*turb->delta); @ The inner loop is the sum over the frequency angle. It computes $\varphi_j$, $\sin(\varphi_j)$ and $\cos(\varphi_j)$. <>= freq_ang = (j+0.5)*turb->delta; sincosf(freq_ang, &sin_freq_ang, &cos_freq_ang); ij = i*turb->N_a + j; <>= <> sum_l = 0; @ The innest loop is the sum over the layer. <>= <> <> <> <> @ The phase screen in the layers are moved along the x--axis, so the sum need to be modified accordingly: <>= <> <> <> <> @ with <>= v = hypotf(layers[l].vx,layers[l].vy); x_kl += layers[l].altitude*src[i_source].theta_x - v*time; y_kl += layers[l].altitude*src[i_source].theta_y; @ Some new variable definitions: <>= ijl = ij + l*turb->N_a*turb->N_k; gl = 1 - layers[l].altitude/src[i_source].height; x_kl = x_kl0; y_kl = y_kl0; @ The cone effect is applied to the layer coordinates: <>= x_kl *= gl; y_kl *= gl; @ The source angular and time offset are accounted for in the following: <>= x_kl += layers[l].altitude*src[i_source].theta_x - layers[l].vx*time; y_kl += layers[l].altitude*src[i_source].theta_y - layers[l].vy*time; @ The quadrature of the power spectrum density of the turbulent phase: <>= sum_l += sqrt(layers[l].xi0)* ( zeta1[ijl]*cosf( eta1[ijl] + freq_mag*( x_kl*cos_freq_ang + y_kl*sin_freq_ang ) ) + zeta2[ijl]*cosf( eta2[ijl] - freq_mag*( x_kl*sin_freq_ang - y_kl*cos_freq_ang ) )); @ A collimated beam can be forced by re--writing the sum over the layer without the cone effect: <>= <> <> <> @ The next routine compute the phase screen on a square grid of size [[N_x]]$\times$[[N_y]] \index{atmosphere!atmosphere!get\_phase\_screen} <>= void atmosphere::get_phase_screen(float const delta_x, int N_x, float const delta_y, int N_y, source *src, float time) { dim3 blockDim(16,16); dim3 gridDim( 1+N_x/16 , 1+N_y/16); square_plps LLL gridDim , blockDim RRR (src->wavefront.phase, delta_x, N_x, delta_y, N_y, d__turbulence, wavenumber*powf(r0,-5.0/6.0), d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time); } <>= void atmosphere::get_phase_screen(float *phase_screen, float const delta_x, int N_x, float const delta_y, int N_y, source *src, float time) { dim3 blockDim(16,16); dim3 gridDim( 1+N_x/16 , 1+N_y/16); square_plps LLL gridDim , blockDim RRR (phase_screen, delta_x, N_x, delta_y, N_y, d__turbulence, wavenumber*powf(r0,-5.0/6.0), d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time); } <>= void atmosphere::get_phase_screen(source *src, float const delta_x, int N_x, float const delta_y, int N_y, float time) { dim3 blockDim(16,16); dim3 gridDim( 1+N_x/16 , 1+N_y/16, src->N_SRC); if (src->wavefront.M==NULL) { square_plps LLL gridDim , blockDim RRR (src->wavefront.phase, delta_x, N_x, delta_y, N_y, d__turbulence, wavenumber*powf(r0,-5.0/6.0), d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time); } else { square_plps_masked LLL gridDim , blockDim RRR (src->wavefront.phase, src->wavefront.M->m, delta_x, N_x, delta_y, N_y, d__turbulence, wavenumber*powf(r0,-5.0/6.0), d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time); } } <>= void atmosphere::get_phase_screen(source *src, float time) { complex_amplitude *wavefront; wavefront = &(src->wavefront); dim3 blockDim(16,16); dim3 gridDim( 1+wavefront->M->size_px[0]/16 , 1+wavefront->M->size_px[1]/16, src->N_SRC); if (wavefront->M==NULL) square_plps LLL gridDim , blockDim RRR (wavefront->phase, wavefront->M->delta[0], wavefront->M->size_px[0], wavefront->M->delta[1], wavefront->M->size_px[1], d__turbulence, wavenumber*powf(r0,-5.0/6.0), d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time); else square_plps_masked LLL gridDim , blockDim RRR (wavefront->phase, wavefront->M->m, wavefront->M->delta[0], wavefront->M->size_px[0], wavefront->M->delta[1], wavefront->M->size_px[1], d__turbulence, wavenumber*powf(r0,-5.0/6.0), d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time); } <>= void atmosphere::get_phase_screen(source *src, int N_SRC, float const delta_x, int N_x, float const delta_y, int N_y, float time) { dim3 blockDim(16,16); dim3 gridDim( 1+N_x/16 , 1+N_y/16, N_SRC); square_plps LLL gridDim , blockDim RRR (src->wavefront.phase, delta_x, N_x, delta_y, N_y, d__turbulence, wavenumber*powf(r0,-5.0/6.0), d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time); } @ and computed with the kernel <>= __global__ void square_plps(float *phase_screen, float const delta_x, int N_x, float const delta_y, int N_y, profile *turb, float r0, layer *layers, int N_LAYER, float *zeta1, float *eta1, float *zeta2, float *eta2, source *src, float time) { <> <> sum = 0; if ( (i> for (i=0;iN_k;i++) { <> for (j=0;jN_a;j++) { <> for (l=0;l> } sum += sqrt_spectrum_kernel*sum_l; } } phase_screen[kl] += 1.4*r0*sum; } } @ <>= __global__ void square_plps_layers(float *phase_screen, float const delta_x, int N_x, float const delta_y, int N_y, profile *turb, float r0, layer *layers, int n_LAYER, float *zeta1, float *eta1, float *zeta2, float *eta2, source *src, float time) { float v; <> <> sum = 0; if ( (i> for (i=0;iN_k;i++) { <> for (j=0;jN_a;j++) { <> // for (l=0;l> } sum += sqrt_spectrum_kernel*sum_l; } } phase_screen[kl] = 1.4*r0*sum; } } @ or if the wavefront of the source is masked: <>= __global__ void square_plps_masked(float *phase_screen, char *mask, float const delta_x, int N_x, float const delta_y, int N_y, profile *turb, float r0, layer *layers, int N_LAYER, float *zeta1, float *eta1, float *zeta2, float *eta2, source *src, float time) { <> <> sum = 0; if ( ( (i> for (i=0;iN_k;i++) { <> for (j=0;jN_a;j++) { <> for (l=0;l> } sum += sqrt_spectrum_kernel*sum_l; } } phase_screen[kl] += 1.4*r0*sum; } } @ The following method is computing phase screens replacing the $11/6$ exponent of the phase power spectrum with a used defined exponent. <>= void atmosphere::get_phase_screen(source *src, float const delta_x, int N_x, float const delta_y, int N_y, float time, float exponent) { dim3 blockDim(16,16); dim3 gridDim( 1+N_x/16 , 1+N_y/16, src->N_SRC); if (src->wavefront.M==NULL) { square_gplps LLL gridDim , blockDim RRR (src->wavefront.phase, delta_x, N_x, delta_y, N_y, d__turbulence, wavenumber*powf(r0,-5.0/6.0), d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time, exponent); } else { square_gplps_masked LLL gridDim , blockDim RRR (src->wavefront.phase, src->wavefront.M->m, delta_x, N_x, delta_y, N_y, d__turbulence, wavenumber*powf(r0,-5.0/6.0), d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time, exponent); } } @ with the kernels <>= __global__ void square_gplps(float *phase_screen, float const delta_x, int N_x, float const delta_y, int N_y, profile *turb, float r0, layer *layers, int N_LAYER, float *zeta1, float *eta1, float *zeta2, float *eta2, source *src, float time, float exponent) { <> <> sum = 0; if ( (i> for (i=0;iN_k;i++) { <> for (j=0;jN_a;j++) { <> for (l=0;l> } sum += sqrt_spectrum_kernel*sum_l; } } phase_screen[kl] = 1.4*r0*sum; } } @ and <>= __global__ void square_gplps_masked(float *phase_screen, char *mask, float const delta_x, int N_x, float const delta_y, int N_y, profile *turb, float r0, layer *layers, int N_LAYER, float *zeta1, float *eta1, float *zeta2, float *eta2, source *src, float time, float exponent) { <> <> sum = 0; if ( ( (i> for (i=0;iN_k;i++) { <> for (j=0;jN_a;j++) { <> for (l=0;l> } sum += sqrt_spectrum_kernel*sum_l; } } phase_screen[kl] = 1.4*r0*sum; } } @ where <>= f_red *= f_red0; freq_mag = freq_mag0*f_red; delta_freq_mag = delta_freq_mag0*f_red; sqrt_spectrum_kernel = powf( freq_mag*freq_mag + freq_L0_square, -0.5*exponent)* sqrt(freq_mag*delta_freq_mag*turb->delta); @ Each thread is computing one value of the phase screen at the coordinate [[x_kl = i*delta_x]] and [[y_kl = j*delta_y]] <>= i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; i_source = blockIdx.z; kl = i * N_y + j + i_source*N_x*N_y; x_kl0 = i*delta_x - (N_x-1)*delta_x*0.5; y_kl0 = j*delta_y - (N_y-1)*delta_y*0.5; @ <>= i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; i_source = blockIdx.z; kl = i * N_y + j + i_source*N_x*N_y; x_kl0 = delta_x*(i - (N_y-1)*0.5 - N_x + N_y); y_kl0 = delta_y*(j - (N_y-1)*0.5); @ \subsection{Phase screen gradient} \label{sec:phase-scre-grad} For a given wavefront $\varphi(x,y)$ and the corresponding image through the aperture $A(x,y)$, the centroids are given by \begin{equation} \label{eq:14} s_{x,y} = {\lambda\over2\pi} {1\over S} \iint {\mathrm d\vec r} A(x,y) {\partial \varphi(x,y)\over\partial_{x,y}} \end{equation} with $$S=\iint {\mathrm d\vec r} A(x,y)$$. Integration by parts leads to the equivalent expression: \begin{equation} \label{eq:15} s_{x,y} = -{\lambda\over2\pi} {1\over S} \iint {\mathrm d\vec r} {\partial A(x,y) \over\partial_{x,y}} \varphi(x,y) \end{equation} The centroids corresponding to a wavefront on a square pupil of size $d$ is written: \begin{equation} \label{eq:16} s_{x,y} = {\lambda\over2\pi} {1\over d^2} \iint {\mathrm d\vec r} \Pi\left( x-x_L\over d \right) \Pi\left( y-y_L\over d \right) {\partial \varphi(x,y)\over\partial_{x,y}}. \end{equation} where $x_L$ and $y_L$ are the pupil center coordinates. It is also given by the difference of the wavefront averaged on both sides of the pupil: \begin{eqnarray} \label{eq:17} s_x &=& {\lambda\over2\pi} {1\over d} \int_{-d/2}^{d/2} {\mathrm d}y \varphi(x-x_L,y-y_L) \left[\delta\left(x+{d\over2}\right)-\delta\left(x-{d\over2}\right)\right] \\ s_y &=& {\lambda\over2\pi} {1\over d} \int_{-d/2}^{d/2} {\mathrm d}x \varphi(x-x_L,y-y_L) \left[\delta\left(y+{d\over2}\right)-\delta\left(y-{d\over2}\right)\right] \\ \end{eqnarray} Inserting Eq.~(\ref{eq:13}) into Eq.~(\ref{eq:17}) and performing the integration leads to: \begin{eqnarray} \label{eq:18} s_x &=& -{\lambda\over 2\pi} 1.4 r_0^{-{5\over6}} \sum_{i=1}^{N_\kappa}\sum_{j=1}^{N_\varphi} \Gamma\left( \bk\right) \sum_{l=1}^{[[N_LAYER]]} \xi_{0l}^{1\over2} \mathrm{sinc}\left(\gamma_l^\ast\bkx d\over 2\right) \mathrm{sinc}\left(\gamma_l^\ast\bky d\over 2\right) \\ && \times \left\{ \zeta_{1l}(i,j) \sin\left[ \eta_{1l}(i,j) + \bkx x_{L,l} + \bky y_{L,l} \right] \bkx \right. \nonumber\\ && \quad - \left. \zeta_{2l}(i,j) \sin\left[ \eta_{2l}(i,j) - \bky x_{L,l} + \bkx y_{L,l} \right] \bky \right\} \nonumber\\ s_y &=& -{\lambda\over 2\pi} 1.4 r_0^{-{5\over6}} \sum_{i=1}^{N_\kappa}\sum_{j=1}^{N_\varphi} \Gamma\left( \bk\right) \sum_{l=1}^{[[N_LAYER]]} \xi_{0l}^{1\over2} \mathrm{sinc}\left(\gamma_l^\ast\bkx d\over 2\right) \mathrm{sinc}\left(\gamma_l^\ast\bky d\over 2\right) \\ && \times \left\{ \zeta_{1l}(i,j) \sin\left[ \eta_{1l}(i,j) + \bkx x_{L,l} + \bky y_{L,l} \right] \bky \right. \nonumber\\ && \quad + \left. \zeta_{2l}(i,j) \sin\left[ \eta_{2l}(i,j) - \bky x_{L,l} + \bkx y_{L,l} \right] \bkx \right\} \nonumber \end{eqnarray} with \begin{eqnarray} \label{eq:19} x_{L,l} &=& \gamma_l^\ast (x_L-v_xt) + h_l\theta_{\ast x} \\ y_{L,l} &=& \gamma_l^\ast (y_L-v_yt) + h_l\theta_{\ast y} \\ \gamma_l^\ast &=& 1 - {h_l\over z_\ast}\\ \bkx &=& \bk \cos \phi_j \\ \bky &=& \bk \sin \phi_j \end{eqnarray} The phase screen gradient are computed at a set of location given by the $x$ and $y$ coordinate arrays $[N_{xy}]$ \index{atmosphere!atmosphere!get\_phase\_screen\_gradient} <>= void atmosphere::get_phase_screen_gradient(float *sx, float *sy, float *x, float *y, int Nxy, source *src, float time) { dim3 blockDim(256); dim3 gridDim( 1+Nxy/256); plps_xy_gradient LLL gridDim , blockDim RRR (sx, sy, x,y,Nxy, d__turbulence, wavelength, r0, d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time); } @ with the kernel <>= __global__ void plps_xy_gradient(float *sx, float *sy, float *x, float *y, int Nxy, profile *turb, float wavelength, float r0, layer *layers, int N_LAYER, float *zeta1, float *eta1, float *zeta2, float *eta2, source *src, float time) { <> float sum_x, sum_l_x, sum_y, sum_l_y, red0, red1, red2; i = blockIdx.x * blockDim.x + threadIdx.x; kl=i; sum_x = sum_y = 0; if (i> for (i=0;iN_k;i++) { <> for (j=0;jN_a;j++) { <> cos_freq_ang *= freq_mag; sin_freq_ang *= freq_mag; sum_l_x = sum_l_y = 0; for (l=0;lN_a*turb->N_k; gl = 1 - layers[l].altitude/src[i_source].height; x_kl = gl*x_kl0; y_kl = gl*y_kl0; x_kl += layers[l].altitude*src[i_source].theta_x - layers[l].vx*time; y_kl += layers[l].altitude*src[i_source].theta_y - layers[l].vy*time; red0 = sqrt(layers[l].xi0); red1 = zeta1[ijl]*sin( eta1[ijl] + cos_freq_ang*x_kl + sin_freq_ang*y_kl ); red2 = zeta2[ijl]*sin( eta2[ijl] - sin_freq_ang*x_kl + cos_freq_ang*y_kl ); sum_l_x += red0*( red1*cos_freq_ang - red2*sin_freq_ang); sum_l_y += red0*( red1*sin_freq_ang + red2*cos_freq_ang); } sum_x += sqrt_spectrum_kernel*sum_l_x; sum_y += sqrt_spectrum_kernel*sum_l_y; } } red0 = 1.4*powf(r0,-5.0/6.0)*wavelength/(2*PI); sx[kl] = red0*sum_x; sy[kl] = red0*sum_y; } } @ The phase screen gradient are computed on a $N_L\times N_L$ array with a resolution $d$ \index{atmosphere!atmosphere!get\_phase\_screen\_gradient} <>= void atmosphere::get_phase_screen_gradient(float *sx, float *sy, int NL, float const d, source *src, float time) { dim3 blockDim(16,16); dim3 gridDim( 1+NL/16 , 1+NL/16); plps_gradient LLL gridDim , blockDim RRR (sx, sy, NL, d, d__turbulence, wavelength, r0, d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time); } <>= void atmosphere::get_phase_screen_gradient(centroiding *cog, int NL, float const d, source *src, float time) { dim3 blockDim(16,16); dim3 gridDim( 1+NL/16 , 1+NL/16, src->N_SRC); if (cog->MASK_SET) plps_gradient_mask LLL gridDim , blockDim RRR (cog->d__cx, cog->d__cy, NL, cog->lenslet_mask, d, d__turbulence, wavelength, r0, d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time); else plps_gradient LLL gridDim , blockDim RRR (cog->d__cx, cog->d__cy, NL, d, d__turbulence, wavelength, r0, d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time); } void atmosphere::get_phase_screen_gradient_rolling_shutter(centroiding *cog, int NL, float const d, source *src, float time, float delay) { dim3 blockDim(16,16); dim3 gridDim( 1+NL/16 , 1+NL/16, src->N_SRC); if (cog->MASK_SET) plps_gradient_mask_rolling_shutter LLL gridDim , blockDim RRR (cog->d__cx, cog->d__cy, NL, cog->lenslet_mask, d, d__turbulence, wavelength, r0, d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time, delay); else plps_gradient_rolling_shutter LLL gridDim , blockDim RRR (cog->d__cx, cog->d__cy, NL, d, d__turbulence, wavelength, r0, d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time,delay); } <>= void atmosphere::get_phase_screen_gradient(centroiding *cog, int NL, float const d, source *src, int N_SRC, float time) { dim3 blockDim(16,16); dim3 gridDim( 1+NL/16 , 1+NL/16, N_SRC); if (cog->MASK_SET) plps_gradient_mask LLL gridDim , blockDim RRR (cog->d__cx, cog->d__cy, NL, cog->lenslet_mask, d, d__turbulence, wavelength, r0, d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time); else plps_gradient LLL gridDim , blockDim RRR (cog->d__cx, cog->d__cy, NL, d, d__turbulence, wavelength, r0, d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time); } <>= void atmosphere::get_phase_screen_gradient(centroiding *cog, int NL, char *valid_lenslet, float const d, source *src, int N_SRC, float time) { dim3 blockDim(16,16); dim3 gridDim( 1+NL/16 , 1+NL/16, N_SRC); plps_gradient_mask LLL gridDim , blockDim RRR (cog->d__cx, cog->d__cy, NL, valid_lenslet, d, d__turbulence, wavelength, r0, d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time); } @ with the kernel <>= __global__ void plps_gradient(float *sx, float *sy, int NL, float const d, profile *turb, float wavelength, float r0, layer *layers, int N_LAYER, float *zeta1, float *eta1, float *zeta2, float *eta2, source *src, float time) { <> float sum_x, sum_l_x, sum_y, sum_l_y, red0, red1, red2; <> sum_x = sum_y = 0; if ( (i> for (i=0;iN_k;i++) { <> for (j=0;jN_a;j++) { <> cos_freq_ang *= freq_mag; sin_freq_ang *= freq_mag; sum_l_x = sum_l_y = 0; for (l=0;l> } sum_x += sqrt_spectrum_kernel*sum_l_x; sum_y += sqrt_spectrum_kernel*sum_l_y; } } red0 = 1.4*powf(r0,-5.0/6.0)*wavelength/(2*PI); sx[kl] = red0*sum_x; sy[kl] = red0*sum_y; } } __global__ void plps_gradient_rolling_shutter(float *sx, float *sy, int NL, float const d, profile *turb, float wavelength, float r0, layer *layers, int N_LAYER, float *zeta1, float *eta1, float *zeta2, float *eta2, source *src, float _time_, float delay) { <> float sum_x, sum_l_x, sum_y, sum_l_y, red0, red1, red2; float time, tau; tau = delay/NL/NL; <> sum_x = sum_y = 0; if ( (i> time = _time_ + (i*NL+j)*tau; for (i=0;iN_k;i++) { <> for (j=0;jN_a;j++) { <> cos_freq_ang *= freq_mag; sin_freq_ang *= freq_mag; sum_l_x = sum_l_y = 0; for (l=0;l> } sum_x += sqrt_spectrum_kernel*sum_l_x; sum_y += sqrt_spectrum_kernel*sum_l_y; } } red0 = 1.4*powf(r0,-5.0/6.0)*wavelength/(2*PI); sx[kl] = red0*sum_x; sy[kl] = red0*sum_y; } } @ A mask identifying the valid lenslet can also be passed to the routine: \index{atmosphere!atmosphere!get\_phase\_screen\_gradient} <>= void atmosphere::get_phase_screen_gradient(float *sx, float *sy, int NL, char *valid_lenslet, float const d, source *src, float time) { dim3 blockDim(16,16); dim3 gridDim( 1+NL/16 , 1+NL/16); plps_gradient_mask LLL gridDim , blockDim RRR (sx, sy, NL, valid_lenslet, d, d__turbulence, wavelength, r0, d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time); } @ with the kernel <>= __global__ void plps_gradient_mask(float *sx, float *sy, int NL, char *valid_lenslet, float const d, profile *turb, float wavelength, float r0, layer *layers, int N_LAYER, float *zeta1, float *eta1, float *zeta2, float *eta2, source *src, float time) { <> float sum_x, sum_l_x, sum_y, sum_l_y, red0, red1, red2; <> sum_x = sum_y = 0; if ( (i> if (valid_lenslet[i*NL+j]>0) { for (i=0;iN_k;i++) { <> for (j=0;jN_a;j++) { <> cos_freq_ang *= freq_mag; sin_freq_ang *= freq_mag; sum_l_x = sum_l_y = 0; for (l=0;l> } sum_x += sqrt_spectrum_kernel*sum_l_x; sum_y += sqrt_spectrum_kernel*sum_l_y; } } red0 = 1.4*powf(r0,-5.0/6.0)*wavelength/(2*PI); sx[kl] = red0*sum_x; sy[kl] = red0*sum_y; } else { sx[kl] = 0.0; sy[kl] = 0.0; } } } __global__ void plps_gradient_mask_rolling_shutter(float *sx, float *sy, int NL, char *valid_lenslet, float const d, profile *turb, float wavelength, float r0, layer *layers, int N_LAYER, float *zeta1, float *eta1, float *zeta2, float *eta2, source *src, float _time_, float delay) { <> float sum_x, sum_l_x, sum_y, sum_l_y, red0, red1, red2; float time, tau; tau = delay/NL/NL; <> sum_x = sum_y = 0; if ( (i> time = _time_ + (i*NL+j)*tau; if (valid_lenslet[i*NL+j]>0) { for (i=0;iN_k;i++) { <> for (j=0;jN_a;j++) { <> cos_freq_ang *= freq_mag; sin_freq_ang *= freq_mag; sum_l_x = sum_l_y = 0; for (l=0;l> } sum_x += sqrt_spectrum_kernel*sum_l_x; sum_y += sqrt_spectrum_kernel*sum_l_y; } } red0 = 1.4*powf(r0,-5.0/6.0)*wavelength/(2*PI); sx[kl] = red0*sum_x; sy[kl] = red0*sum_y; } else { sx[kl] = 0.0; sy[kl] = 0.0; } } } @ Each thread is computing one value of the phase screen gradient at the coordinate [[x_kl = (i+1/2)*d]] and [[y_kl = (j+1/2)*delta_y]] <>= i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; i_source = blockIdx.z; kl = i * NL + j + 2*i_source*NL*NL; x_kl0 = (i+0.5)*d - NL*d*0.5; y_kl0 = (j+0.5)*d - NL*d*0.5; @ The innest loop is the sum over the layer. <>= ijl = ij + l*turb->N_a*turb->N_k; gl = 1 - layers[l].altitude/src[i_source].height; x_kl = gl*x_kl0; y_kl = gl*y_kl0; x_kl += layers[l].altitude*src[i_source].theta_x - layers[l].vx*time; y_kl += layers[l].altitude*src[i_source].theta_y - layers[l].vy*time; red0 = sqrt(layers[l].xi0)* sinc_atm(0.5*gl*cos_freq_ang*d)*sinc_atm(0.5*gl*sin_freq_ang*d); red1 = zeta1[ijl]*sin( eta1[ijl] + cos_freq_ang*x_kl + sin_freq_ang*y_kl ); red2 = zeta2[ijl]*sin( eta2[ijl] - sin_freq_ang*x_kl + cos_freq_ang*y_kl ); sum_l_x += red0*( red1*cos_freq_ang - red2*sin_freq_ang); sum_l_y += red0*( red1*sin_freq_ang + red2*cos_freq_ang); <>= __device__ float sinc_atm(float x) { return (x==0) ? 1.0 : sin(x) / (x) ; } @ \subsection{Jitter} \label{sec:jitter} For a circular pupil of radius $R$, the centroids are given by: \begin{eqnarray} \label{eq:20} s_x &=& -{\lambda\over2\pi} {1 \over \pi R} \int_0^{2\pi} {\mathrm d}\theta \cos(\theta) \varphi(R,\theta)\\ s_y &=& -{\lambda\over2\pi} {1 \over \pi R} \int_0^{2\pi} {\mathrm d}\theta \sin(\theta) \varphi(R,\theta) \end{eqnarray} @ \index{atmosphere!atmosphere!get\_phase\_screen\_circ\_centroids} <>= void atmosphere::get_phase_screen_circ_centroids(centroiding *cog, const float R, source *src, int N_SRC, float time) { int N_o = 360; dim3 blockDim(16,16); dim3 gridDim( ceilf(sqrt(N_o)/16) , ceilf(sqrt(N_o)/16)); circ_centroids_kernel LLL gridDim , blockDim RRR (cog->d__cx, cog->d__cy, N_o, R, d__turbulence, wavenumber*powf(r0,-5.0/6.0), d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time); } <>= __global__ void circ_centroids_kernel(float *cx, float *cy, int N_xy, const float R, profile *turb, float r0, layer *layers, int N_LAYER, float *zeta1, float *eta1, float *zeta2, float *eta2, source *src, float time) { <> i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; i_source = blockIdx.z; kl = j * gridDim.x * blockDim.x + i + i_source*N_xy; sum = 0; if (kl> float o, so, co, c, _cx_, _cy_, phase_screen; c = -2.0/(R*N_xy); o = 2*PI*kl/N_xy; sincosf(o,&so,&co); x_kl0 = R*co; y_kl0 = R*so; cx[0] = cy[0] = 0.0; for (i=0;iN_k;i++) { <> for (j=0;jN_a;j++) { <> for (l=0;l> } sum += sqrt_spectrum_kernel*sum_l; } } phase_screen = 1.4*r0*sum; _cx_ = c*co*phase_screen; _cy_ = c*so*phase_screen; atomicAdd(&cx[0], _cx_); atomicAdd(&cy[0], _cy_); } } @ The uplink jitter is given by: \begin{eqnarray} \label{eq:21} s_x &=& \sum_k^{[[N_LAYER]]} \left( 1 - {z_k\over L} \right) s_x(k) \\ s_y &=& \sum_k^{[[N_LAYER]]} \left( 1 - {z_k\over L} \right) s_y(k) \\ \end{eqnarray} where $L$ is the source height and $z_k$ is the atmosphere layer altitude. \index{atmosphere!atmosphere!get\_phase\_screen\_circ\_uplink\_centroids} @ <>= void atmosphere::get_phase_screen_circ_uplink_centroids(centroiding *cog, const float R, source *src, int N_SRC, float time, char focused) { int N_o = 360; dim3 blockDim(16,16); dim3 gridDim( ceilf(sqrt(N_o)/16) , ceilf(sqrt(N_o)/16)); if (focused==1) circ_focused_uplink_centroids_kernel LLL gridDim , blockDim RRR (cog->d__cx, cog->d__cy, N_o, R, d__turbulence, wavenumber*powf(r0,-5.0/6.0), d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time); else circ_uplink_centroids_kernel LLL gridDim , blockDim RRR (cog->d__cx, cog->d__cy, N_o, R, d__turbulence, wavenumber*powf(r0,-5.0/6.0), d__layers, N_LAYER, zeta1, eta1, zeta2, eta2, src->dev_ptr, time); } @ For a collimated uplink beam, the device kernel is: <>= __global__ void circ_uplink_centroids_kernel(float *cx, float *cy, int N_xy, const float R, profile *turb, float r0, layer *layers, int N_LAYER, float *zeta1, float *eta1, float *zeta2, float *eta2, source *src, float time) { <> i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; i_source = blockIdx.z; kl = j * gridDim.x * blockDim.x + i + i_source*N_xy; sum = 0; if (kl> float o, so, co, c, _cx_, _cy_, phase_screen; c = -2.0/(R*N_xy); o = 2*PI*kl/N_xy; sincosf(o,&so,&co); x_kl0 = R*co; y_kl0 = R*so; cx[0] = cy[0] = 0.0; for (i=0;iN_k;i++) { <> for (j=0;jN_a;j++) { <> for (l=0;l> sum_l *= gl; } sum += sqrt_spectrum_kernel*sum_l; } } phase_screen = 1.4*r0*sum; _cx_ = c*co*phase_screen; _cy_ = c*so*phase_screen; atomicAdd(&cx[0], _cx_); atomicAdd(&cy[0], _cy_); } } @ For a focused uplink beam, the device kernel is: <>= __global__ void circ_focused_uplink_centroids_kernel(float *cx, float *cy, int N_xy, const float R, profile *turb, float r0, layer *layers, int N_LAYER, float *zeta1, float *eta1, float *zeta2, float *eta2, source *src, float time) { <> i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; i_source = blockIdx.z; kl = j * gridDim.x * blockDim.x + i + i_source*N_xy; sum = 0; if (kl> float o, so, co, c, _cx_, _cy_, phase_screen; c = -2.0/(R*N_xy); o = 2*PI*kl/N_xy; sincosf(o,&so,&co); x_kl0 = R*co; y_kl0 = R*so; cx[0] = cy[0] = 0.0; for (i=0;iN_k;i++) { <> for (j=0;jN_a;j++) { <> for (l=0;l> sum_l *= gl; } sum += sqrt_spectrum_kernel*sum_l; } } phase_screen = 1.4*r0*sum; _cx_ = c*co*phase_screen; _cy_ = c*so*phase_screen; atomicAdd(&cx[0], _cx_); atomicAdd(&cy[0], _cy_); } } @ \subsection{Ray tracing} \label{sec:ray-tracing} This note describe a CUDA implementation of ray tracing through several phase screens from a set of difference sources at a finite or infinite distance. The phase screens, computed independently, are rectangular strip with the longest side aligned along the wind direction. % he short side length [[W]] is the size of the field--of--view at the layer altitude $h$, i.e $$[[W]]=D + 2h\tan(\omega/2),$$ where $D$ is the telescope diameter and $\omega$ is the field--of--view. % The long side length [[L]] depends on the wind speed $v$ and the experience duration $T$, i.e. $$L = W + vT.$$ The atmosphere has [[N_LAYER]] turbulence layers and there are [[N_SOURCE]] different sources. The ray tracing consists in accumulating on a set of coordinates in the telescope pupil plane the values of phase screens distributed above the telescope at several altitudes. The wavefront is accumulated on rays going from the source through the phase screens and to the (x,y) location in the pupil plane. The geometric propagation does the following: (i) the pupil plane coordinates are transformed into new coordinates at each layer (Sec.~\ref{sec:coord-transf}) and (ii) the wavefront of each layer is interpolated onto the new coordinates (Sec.~\ref{sec:bi-line-interp}). \subsubsection{Coordinate transformation} \label{sec:coord-transf} %The coordinates in the telescope pupil are defined on a square Cartesian grid of [[N_PUPIL]]$\times$[[N_PUPIL]] pixels. The coordinates in the pupil plane are given in the vectors [[x_PUPIL]] and [[y_PUPIL]] of length [[NXY_PUPIL]] each. The pupil coordinates need to be transformed at each layer to take into account the height and direction of the source and the wind vector for the layer considered. At a given layer $[[k_LAYER]]\in[0,\dots,[[N_LAYER-1]]]$ for the source $[[k_SOURCE]]\in[0,\dots,[[N_SOURCE-1]]]$, the coordinates [[x_PUPIL]] and [[y_PUPIL]] undergo the following transformations \begin{enumerate} \item scaling by the factor [[g]] given by <>= g = 1 - layers[k_LAYER].altitude/src[k_SOURCE].height; @ where [[iz_SOURCE]] is the inverse of the source height, leading to <>= x = g*xP; y = g*yP; @ \item translation based on the intersection between the ray going from the center of the pupil to the source and the atmosphere layer plane <>= x += src[k_SOURCE].theta_x*layers[k_LAYER].altitude; y += src[k_SOURCE].theta_y*layers[k_LAYER].altitude; @ where [[theta_x_SOURCE]] and [[theta_y_SOURCE]] are the x and y components of the direction vector of the source [[k_SOURCE]]. They are written $$[[theta_x_SOURCE]]=\tan(\zeta)\cos(\alpha)$$ and $$[[theta_y_SOURCE]]=\tan(\zeta)\sin(\alpha)$$ with $\zeta$ and $\alpha$ the zenith and azimuth coordinates of the source. \item rotation according to the wind direction [[theta_wind]]; the phase screens being aligned with the wind vector, the pupil plane coordinates are rotated to be correctly aligned with the wind vector in the phase screens frame of reference <>= sincosf(layers[k_LAYER].wind_direction, &s, &c); xi = x*c + y*s; yi = y*c - x*s; @ \item translation according to the wind speed [[mag_wind]] and time step [[tau]]; the phase screens are carried by the wind which is blowing along the x--axis in the phase screen frame of reference, the pupil plane coordinates in this frame are then translated along the phase screens x--axis <>= xi -= tau*layers[k_LAYER].wind_speed; @ \end{enumerate} @ \subsubsection{Bi--linear interpolation} \label{sec:bi-line-interp} Once the pupil coordinates have been transformed into the reference frame of each layer, the phase screens are interpolated onto the new coordinates. The phase screens are long rectangular strips of width [[WIDTH_LAYER]] and length [[LENGTH_LAYER]]. The phase screens have been computed on Cartesian grids of pixel size [[N_WIDTH_LAYER]]$\times$[[N_LENGTH_LAYER]] with $$x\in[-[[WIDTH_LAYER]]/2,[[LENGTH_LAYER]]-[[WIDTH_LAYER/2]]]$$ and $$y\in[-[[WIDTH_LAYER]]/2,[[WIDTH_LAYER]]/2].$$ The phase screen of one layer is stored in a vector of length $[[N_WIDTH_LAYER]]\times[[N_WIDTH_LAYER]]$ and all the phase screens from all the layers are concatenated into a single vector [[d__phase_screen_LAYER]]. The first phase value of the layer [[k_LAYER]] is read with <>= N_L = layers[k_LAYER].N_LENGTH_LAYER; N_W = layers[k_LAYER].N_WIDTH_LAYER; z = d__phase_screen_LAYER + pitch; pitch += N_L*N_W; @ [[pitch]] must first be initialized to 0. The interpolation starts with normalizing the coordinates to the pixel size of the phase screen i.e. <>= s = (N_L - 1 ) *(xi - layers[k_LAYER].WIDTH_LAYER*0.5 + layers[k_LAYER].LENGTH_LAYER)/layers[k_LAYER].LENGTH_LAYER; t = (N_W - 1 ) *(yi/layers[k_LAYER].WIDTH_LAYER + 0.5); @ and taking the floor integer part to locate the bottom--left coordinate of the pixels surrounding [[xi]] and [[yi]] <>= if (s<0) { s=0.0; } fs = floorf(s); if (t<0) { t=0.0; } ft = floorf(t); ndx = __float2int_rd( ft + fs*N_W ); @ Next, one checks if the edge of the phase screens have been reached <>= if (fs==(N_L-1)) { s += 1 - fs; ndx -= N_W; } else { s -= fs; } if (ft==(N_W-1)) { t += 1 - ft; ndx -= 1; } else { t -= ft; } @ and finally the interpolation is computed as <>= onemt = 1 - t; zi = ( z[ndx]*onemt + z[ndx+1]*t )*(1-s) + ( z[ndx+N_W]*onemt + z[ndx+N_W+1]*t )*s; @ \subsubsection{The CUDA kernel} \label{sec:cuda-kernel} The codes in the former sections have implicitly declared several variable that we are declaring formally now <>= int N_L, N_W, k_LAYER, k_SOURCE, ij, ndx, ix, iy, pitch; float x, y, xi, yi, zi, s, c, t, fs, ft, g, onemt, xP, yP; float *z; @ The GPU thhread block and grid sizes are set such as the x and y threads process the telescope pupil coordinates and the z thread span the different sources. <>= ix = blockIdx.x * blockDim.x + threadIdx.x; iy = blockIdx.y * blockDim.y + threadIdx.y; pitch = gridDim.x * blockDim.x; ij = iy * pitch + ix; xP = x_PUPIL[ij]; yP = y_PUPIL[ij]; k_SOURCE = blockIdx.z; pitch = 0; @ <>= ix = blockIdx.x * blockDim.x + threadIdx.x; iy = blockIdx.y * blockDim.y + threadIdx.y; ij = iy + ix * N_Y; xP = delta_x*( ix - (N_X-1)*0.5 ); yP = delta_y*( iy - (N_Y-1)*0.5 ); k_SOURCE = blockIdx.z; pitch = 0; int NXY_PUPIL = N_X*N_Y; @ The kernel itself is written has follows for coordinates [[x_PUPIL]] and [[y_PUPIL]]: <>= __global__ void rayTracingKern( const float* x_PUPIL,const float* y_PUPIL, float* phase_screen_PUPIL,const int NXY_PUPIL, const source* src, const int N_SOURCE, const float r0, const layer* layers, float* d__phase_screen_LAYER, const int N_LAYER, const float tau) { <> <> if ((ij> <> phase_screen_PUPIL[ij+k_SOURCE*NXY_PUPIL] += r0*zi; } } } @ or for a regular grid with spacing [[delta_x]] and [[delta_y]] and size [[N_X]] and [[N_Y]]. <>= __global__ void rayTracingKernReg( const float delta_x, const int N_X, const float delta_y, const int N_Y, float* phase_screen_PUPIL, const source* src, const int N_SOURCE, const float r0, const layer* layers, float* d__phase_screen_LAYER, const int N_LAYER, const float tau) { <> <> if ( (ix> <> phase_screen_PUPIL[ij+k_SOURCE*NXY_PUPIL] += r0*zi; } } } __global__ void rayTracingKernReg_masked( const float delta_x, const int N_X, const float delta_y, const int N_Y, float* phase_screen_PUPIL, char *mask, const source* src, const int N_SOURCE, const float r0, const layer* layers, float* d__phase_screen_LAYER, const int N_LAYER, const float tau) { <> <> if ( (ix> <> phase_screen_PUPIL[ij+k_SOURCE*NXY_PUPIL] += r0*zi; } } } @ The kernel is called with \index{atmosphere!atmosphere!rayTracing} <>= void atmosphere::rayTracing( const float* x_PUPIL,const float* y_PUPIL, float* phase_screen_PUPIL, const int NXY_PUPIL, const source *src, const float tau) { int k_DURATION; float new_tau = tau - layers_tau0; stopwatch tid; //INFO(" . tau=%f\n . layers_tau=%f\n . layers_duration=%f\n",tau,layers_tau0,layers_duration); if ( (fabs(tau-layers_tau0)>layers_duration) || (new_tau<0) ) { k_DURATION = (int) (tau/layers_duration); layers_tau0 = k_DURATION*layers_duration; new_tau = tau - k_DURATION*layers_duration; if (N_DURATION>0) { INFO("\n@(CEO)>atmosphere: Loading phase screens (%.0fs) to device...\n",layers_tau0); tid.tic(); HANDLE_ERROR( cudaMemcpy( d__phase_screen_LAYER, phase_screen_LAYER + k_DURATION*N_PHASE_LAYER, sizeof(float)*N_PHASE_LAYER, cudaMemcpyHostToDevice ) ); tid.toc(); } else { <> } } dim3 blockDim(16,16); dim3 gridDim( ceilf(sqrtf(NXY_PUPIL)/16) , ceilf(sqrtf(NXY_PUPIL)/16), src->N_SRC); rayTracingKern LLL gridDim, blockDim RRR( x_PUPIL, y_PUPIL, phase_screen_PUPIL, NXY_PUPIL, src->dev_ptr, src->N_SRC, powf(r0,-5.0/6.0), d__layers, d__phase_screen_LAYER, N_LAYER, tau-layers_tau0); } @ with <>= if (tau>(layers_tau0+layers_duration)) { layers_tau0 = roundf(tau/layers_duration)*layers_duration; stopwatch tid; <> } @ where the phase screen are computed for the coordinates vector [[x_PUPIL]] and [[y_PUPIL]]. Instead the phase screens can be computed on regular grid with spacing [[delta_x]] and [[delta_y]] and size [[N_X]] and [[N_Y]]. <>= void atmosphere::rayTracing(source *src, const float delta_x, const int N_X, const float delta_y, const int N_Y, const float tau) { int k_DURATION; float new_tau = tau - layers_tau0; stopwatch tid; //INFO(" . tau=%f\n . layers_tau=%f\n . layers_duration=%f\n",tau,layers_tau0,layers_duration); if ( (fabs(tau-layers_tau0)>layers_duration) || (new_tau<0) ) { k_DURATION = (int) (tau/layers_duration); layers_tau0 = k_DURATION*layers_duration; new_tau = tau - k_DURATION*layers_duration; if (N_DURATION>0) { INFO("\n@(CEO)>atmosphere: Loading phase screens (%.0fs) to device...\n",layers_tau0); tid.tic(); HANDLE_ERROR( cudaMemcpy( d__phase_screen_LAYER, phase_screen_LAYER + k_DURATION*N_PHASE_LAYER, sizeof(float)*N_PHASE_LAYER, cudaMemcpyHostToDevice ) ); tid.toc(); } else { <> } } dim3 blockDim(16,16); dim3 gridDim( 1+N_X/16 , 1+N_Y/16, src->N_SRC); if (src->wavefront.M==NULL) { rayTracingKernReg LLL gridDim, blockDim RRR(delta_x, N_X, delta_y, N_Y, src->wavefront.phase, src->dev_ptr, src->N_SRC, powf(r0,-5.0/6.0), d__layers, d__phase_screen_LAYER, N_LAYER, new_tau); } else { rayTracingKernReg_masked LLL gridDim, blockDim RRR(delta_x, N_X, delta_y, N_Y, src->wavefront.phase, src->wavefront.M->m, src->dev_ptr, src->N_SRC, powf(r0,-5.0/6.0), d__layers, d__phase_screen_LAYER, N_LAYER, new_tau); } } @ \section{Tests} \label{sec:tests} \subsection{C tests} \label{sec:c-tests} <>= <> @ \subsubsection{Centroids from a circular aperture} \label{sec:centr-from-circ} This test compares the centroids derived from an image through a circular aperture for both the geometric model and the diffractive model. <>= #ifndef __CEO_H__ #include "h" #endif int main(int argc,char *argv[]) { <> /* float *data; */ float p = 2*R/N_PX, *cx, *cy, *cgx, *cgy, *tau, elapsed_time, buf; int k_SAMPLE, N_SAMPLE = 200; stopwatch tid; tau = (float*)malloc(sizeof(float)*N_SAMPLE); cx = (float*)malloc(sizeof(float)*N_SAMPLE); cy = (float*)malloc(sizeof(float)*N_SAMPLE); cgx = (float*)malloc(sizeof(float)*N_SAMPLE); cgy = (float*)malloc(sizeof(float)*N_SAMPLE); /* HANDLE_ERROR( cudaHostAlloc( (void**)&data, sizeof(float), */ /* cudaHostAllocDefault ) ); */ elapsed_time = 0.0; printf("N_SAMPLE: %d\n",N_SAMPLE); for (k_SAMPLE=0;k_SAMPLE> } @ The parameters are \begin{itemize} \item the telescope radius [[R]]: <>= float R = 12.5; @ \item the pupil pixel sampling [[N_PX]]: <>= int N_PX, N_PX2; N_PX = N_PX2 = 256; N_PX2 *= N_PX; @ \item the guide star: <>= source gs; gs.setup("K",0.0,0.0,INFINITY,N_PX2); <>= gs.cleanup(); @ \item the atmosphere <>= atmosphere atm; atm.gmt_setup(20e-2,30); <>= atm.cleanup(); @ \item the centroiding container: <>= centroiding cog; cog.setup(1,1); <>= cog.cleanup(); @ \item the Shack--Hartmann wavefront sensor: <>= shackHartmann wfs; wfs.setup(1,N_PX,2*R,4,4*N_PX,1,1); <>= wfs.cleanup(); @ \item the telescope pupil: <>= mask telescope; telescope.setup_circular(N_PX); gs.wavefront.masked(&telescope); //gs.wavefront.show_amplitude("atmosphere/amplitude"); wfs.calibrate(&gs,1.0); <>= telescope.cleanup(); @ \end{itemize} \subsubsection{Benchmark I} \label{sec:benchmark-i} <>= #include #include #include "ceo.h" using namespace std; int main(int argc,char *argv[]) { int N_GS, nPx, k, k_GS; float elapsedTime, r0, L0,fov_arcmin,d,tau; float *zen,*azi; rtd D; ofstream f; D = 25.5; r0 = 15e-2; L0 = 60; fov_arcmin = 10; nPx = 469; float altitude[] = {25, 275, 425, 1250, 4000, 8000, 13000}, xi0[] = {0.1257, 0.0874, 0.0666, 0.3498, 0.2273, 0.0681, 0.0751}, wind_speed[] = {5.6540, 5.7964, 5.8942, 6.6370, 13.2925, 34.8250, 29.4187}, wind_direction[] = {0.0136, 0.1441, 0.2177, 0.5672, 1.2584, 1.6266, 1.7462}; stopwatch tid; atmosphere atm; source src; N_GS = 1; zen = (float *) malloc( sizeof(float)*N_GS ); azi = (float *) malloc( sizeof(float)*N_GS ); for (k_GS=0;k_GS>= #include #include #include "ceo.h" using namespace std; int main(int argc,char *argv[]) { int N_GS, N_GS_MAX, nPx, k, k_GS; float elapsedTime, r0, L0,fov_arcmin,d,tau; float *zen,*azi; rtd D; ofstream f; D = 25.5; r0 = 15e-2; L0 = 60; fov_arcmin = 10; nPx = 469; float altitude[] = {25, 275, 425, 1250, 4000, 8000, 13000}, xi0[] = {0.1257, 0.0874, 0.0666, 0.3498, 0.2273, 0.0681, 0.0751}, wind_speed[] = {5.6540, 5.7964, 5.8942, 6.6370, 13.2925, 34.8250, 29.4187}, wind_direction[] = {0.0136, 0.1441, 0.2177, 0.5672, 1.2584, 1.6266, 1.7462}; stopwatch tid; atmosphere atm; source src; N_GS_MAX = 6; d = D/(nPx-1); tau = 0.0; f.open("bench_30s_rayTracingVsGs.txt"); f.precision(3); f << "NL\t"; for (N_GS=1;N_GS<=N_GS_MAX;N_GS++) { f << "\t" << N_GS << "GS"; } f << endl; for (k=1;k<=7;k++) { f << k; atm.setup(r0,L0,k,altitude,xi0,wind_speed,wind_direction,D,nPx,60.0*fov_arcmin); for (N_GS=1;N_GS<=N_GS_MAX;N_GS++) { zen = (float *) malloc( sizeof(float)*N_GS ); azi = (float *) malloc( sizeof(float)*N_GS ); for (k_GS=0;k_GS>= #!/usr/bin/env python import sys sys.path.append("/home/rconan/CEO/python") import time from pylab import * from import * <> @ <>= h = array([0,1,2.5,5,8,10,12],dtype=float32)*1e3 xi0 = array([0.3,0.15,0.25,0.1,0.1,0.05,0.05], dtype=float32) vs = array([10,10,10,10,10,10,10],dtype=float32) vo = array([0,0,0,0,0,0,0], dtype=float32) atm = Atmosphere(15e-2,30,h,xi0,vs,vo) n = 256 nn = array([n,n],dtype=int32) zen = array( [0], dtype=float32) azi = array( [0], dtype=float32) src = Source(zen,azi,float("inf"),nn) tel = Telescope(n) src.masked(tel) atm.get_phase_screen(src, 1, 0.1, n, 0.1, n, 0.0) src.masked(tel) ps = zeros( (n,n), order="c", dtype=float32 ) src.getphase(ps) figure(1) ax = imshow(ps*1e6,interpolation='None') colorbar(ax) nLenslet = 10 cog = Centroiding(nLenslet,1) c = zeros( (nLenslet,nLenslet*2), order="c", dtype=float32 ) atm.get_phase_screen_gradient(cog,nLenslet,0.2,src,1,0.0) cog.getc(c) figure(2) imshow(c,interpolation='None') show() @ <>= R = 12.5 N_PX = N_PX2 = 256 N_PX2 *= N_PX atm = GmtAtmosphere(20e-2,30) nn = array([N_PX,N_PX],dtype=int32) zen = array( [0], dtype=float32) azi = array( [0], dtype=float32) src = Source("K",zen,azi,float("inf"),nn) tel = Telescope(N_PX) p = 2*R/N_PX #atm.get_phase_screen(src, 1, p, N_PX, p, N_PX, 0.0) src.masked(tel) ps = zeros( (N_PX,N_PX), order="c", dtype=float32 ) frame = zeros( (N_PX*4,4*N_PX), order="c", dtype=float32) wfs = ShackHartmann(1,N_PX,2*R,4,4*N_PX,1,1) wfs.calibrate(src,1.0) cog = Centroiding(1,1) c = zeros( 2, order="c", dtype=float32 ) nSample = 200 cx_g = zeros( nSample, order="c", dtype=float32 ) cy_g = zeros( nSample, order="c", dtype=float32 ) cx_d = zeros( nSample, order="c", dtype=float32 ) cy_d = zeros( nSample, order="c", dtype=float32 ) tau = 1e-2 print "N_SAMPLE: %d\n" % nSample elapsed = 0.0 for k in range(nSample): t = time.clock() atm.get_phase_screen_circ_centroids(cog,R,src,1,k*tau) elapsed += time.time() - t cog.getc(c) cx_g[k] = c[0] cy_g[k] = c[1] atm.get_phase_screen(src, 1, p, N_PX, p, N_PX, k*tau) src.masked(tel) wfs.reset() wfs.analyze(src) src.getphase(ps) # figure(1) # ax1 = imshow(ps*1e6,interpolation='None') # colorbar(ax1) wfs.getframe(frame) # figure(2) # ax2 = imshow(frame,interpolation='None') # colorbar(ax2) # draw() wfs.getc(c) cx_d[k] = c[0] cy_d[k] = c[1] elapsed /= nSample elapsed *= 1e3 print "elapsed=%6.2f" % elapsed u = linspace(0,(nSample-1)*tau,nSample) rad2mas = 1e3*180*3600/pi cx_g *= rad2mas cy_g *= rad2mas cx_d *= rad2mas cy_d *= rad2mas #print cx_g #print cx_d #print cy_g #print cy_d figure(3) plot(u,cx_g,label='Cx geom.') plot(u,cy_g,label='Cy geom.') plot(u,cx_d,label='Cx diffr.') plot(u,cy_d,label='Cy diffr.') grid() xlabel('Time [s]') ylabel('Centroids [mas]') legend() show() @ <>= R = 0.15 N_PX = N_PX2 = 256 N_PX2 *= N_PX atm = GmtAtmosphere(20e-2,30) nn = array([N_PX,N_PX],dtype=int32) zen = array( [0], dtype=float32) azi = array( [0], dtype=float32) src = Source("K",zen,azi,90e3,nn) tel = Telescope(N_PX) p = 2*R/N_PX #atm.get_phase_screen(src, 1, p, N_PX, p, N_PX, 0.0) src.masked(tel) ps = zeros( (N_PX,N_PX), order="c", dtype=float32 ) frame = zeros( (N_PX*4,4*N_PX), order="c", dtype=float32) cog = Centroiding(1,1) cog_up = Centroiding(1,1) c = zeros( 2, order="c", dtype=float32 ) nSample = 200 cx_g = zeros( nSample, order="c", dtype=float32 ) cy_g = zeros( nSample, order="c", dtype=float32 ) cx_u = zeros( nSample, order="c", dtype=float32 ) cy_u = zeros( nSample, order="c", dtype=float32 ) tau = 1e-2 print "N_SAMPLE: %d\n" % nSample for k in range(nSample): atm.get_phase_screen_circ_centroids(cog,R,src,1,k*tau) atm.get_phase_screen_circ_uplink_centroids(cog_up,R,src,1,k*tau,0) cog.getc(c) cx_g[k] = c[0] cy_g[k] = c[1] cog_up.getc(c) cx_u[k] = c[0] cy_u[k] = c[1] u = linspace(0,(nSample-1)*tau,nSample) rad2mas = 1e3*180*3600/pi cx_g *= rad2mas cy_g *= rad2mas cx_u *= rad2mas cy_u *= rad2mas figure(3) plot(u,cx_g,label='Cx geom.') plot(u,cy_g,label='Cy geom.') plot(u,cx_u,label='Cx diffr.') plot(u,cy_u,label='Cy diffr.') grid() xlabel('Time [s]') ylabel('Centroids [mas]') legend() show() @ <>= R = 0.15 N_PX = N_PX2 = 256 N_PX2 *= N_PX atm = GmtAtmosphere(20e-2,30) nn = array([N_PX,N_PX],dtype=int32) zen = array( [0], dtype=float32) azi = array( [0], dtype=float32) src = Source("K",zen,azi,90e3,nn) tel = Telescope(N_PX) p = 2*R/N_PX #atm.get_phase_screen(src, 1, p, N_PX, p, N_PX, 0.0) src.masked(tel) ps = zeros( (N_PX,N_PX), order="c", dtype=float32 ) frame = zeros( (N_PX*4,4*N_PX), order="c", dtype=float32) cog = Centroiding(1,1) cog_up = Centroiding(1,1) c = zeros( 2, order="c", dtype=float32 ) nSample = 2000 cx_g = zeros( nSample, order="c", dtype=float32 ) cy_g = zeros( nSample, order="c", dtype=float32 ) cx_u = zeros( nSample, order="c", dtype=float32 ) cy_u = zeros( nSample, order="c", dtype=float32 ) tau = 1e-2 print "N_SAMPLE: %d\n" % nSample for k in range(nSample): atm.reset() atm.get_phase_screen_circ_centroids(cog,R,src,1,0.) atm.get_phase_screen_circ_uplink_centroids(cog_up,R,src,1,0.,0) cog.getc(c) cx_g[k] = c[0] cy_g[k] = c[1] cog_up.getc(c) cx_u[k] = c[0] cy_u[k] = c[1] rad2mas = 1e3*180*3600/pi cx_g *= rad2mas cy_g *= rad2mas cx_u *= rad2mas cy_u *= rad2mas @ <>= n = 256 nn = array([n,n],dtype=int32) zen = array( [0], dtype=float32) azi = array( [0], dtype=float32) src = Source(zen,azi,float("inf"),nn) tel = Telescope(n) src.masked(tel) D = 25.0 wfs = ShackHartmann(1,n,D,4,4*n,1,1) wfs.calibrate(src,1.0) atm = GmtAtmosphere(15e-2,30) p = D/n atm.get_phase_screen(src, 1, p, n, p, n, 0.0) src.masked(tel) #imgr = Imaging(n, 1, 4, n*4, 1, 1) #imgr.propagate(src) wfs.analyze(src) frame = zeros( (n,n*4), order="c", dtype=float32) #imgr.getframe(frame) wfs.getframe(frame) figure(1) ax = imshow(frame,interpolation='None') colorbar(ax) show() @ \subsection{Matlab tests} \label{sec:matlab-tests} The test suite is a Matlab script. It calls the mex function: <>= <> @ <>= #include #include #include "cublas_v2.h" #include "mex.h" #include "gpu/mxGPUArray.h" #ifndef __CEO_H__ #include "h" #endif #ifndef __SOURCE_H__ #include "source.h" #endif #ifndef __ATMOSPHERE_H__ #include "atmosphere.h" #endif static source src; static atmosphere atm; static unsigned int INIT=0; static void cleanup(void) { atm.cleanup(); INIT = 0; } void mexFunction(int nlhs, mxArray *plhs[], int nrhs, mxArray const *prhs[]) { unsigned int inputIndex; float const *d__x, *d__y; float *d__phase_screen; double *reset, *L0, *time; char const * const errId = "parallel:gpu:CEO_SCAO_MEX:InvalidInput"; char const * const errMsg = "Invalid input to MEX file."; /* Check for proper number of input and output arguments */ if (nrhs != 5) { mexErrMsgIdAndTxt( "MATLAB:mxislogical:invalidNumInputs", "Five input argument required."); } if(nlhs > 1){ mexErrMsgIdAndTxt( "MATLAB:mxislogical:maxlhs", "Too many output arguments."); } inputIndex = 0; /* Create GPUArray from mxArray input and get underlying pointer. */ mxGPUArray const *x; x = mxGPUCreateFromMxArray(prhs[inputIndex++]); if (mxGPUGetClassID(x) != mxSINGLE_CLASS) { mexErrMsgIdAndTxt(errId, errMsg); } d__x = (float const *)(mxGPUGetDataReadOnly(x)); // --------------------------------------- mxGPUArray const *y; y = mxGPUCreateFromMxArray(prhs[inputIndex++]); if (mxGPUGetClassID(y) != mxSINGLE_CLASS) { mexErrMsgIdAndTxt(errId, errMsg); } d__y = (float const *)(mxGPUGetDataReadOnly(y)); // --------------------------------------- reset = mxGetPr(prhs[inputIndex++]); L0 = mxGetPr(prhs[inputIndex++]); time = mxGetPr(prhs[inputIndex++]); int N_PIXEL; N_PIXEL = mxGPUGetNumberOfElements(x); /* Create GPUArray to hold the result and get underlying pointer. */ mxGPUArray *phase_screen; phase_screen = mxGPUCreateGPUArray(2,mxGPUGetDimensions(x), mxSINGLE_CLASS,mxREAL,MX_GPU_INITIALIZE_VALUES); d__phase_screen = (float *)(mxGPUGetData(phase_screen)); if (INIT==0) { // Source src.setup("R",ARCSEC(60) , 0, 90e3);//INFINITY); // Single layer turbulence profile int N_LAYER = 1; float altitude[] = {0}, xi0[] = {1}, wind_speed[] = {10}, wind_direction[] = {0}; /* // GMT 7 layers turbulence profile float altitude[] = {25, 275, 425, 1250, 4000, 8000, 13000}, xi0[] = {0.1257, 0.0874, 0.0666, 0.3498, 0.2273, 0.0681, 0.0751}, wind_speed[] = {5.6540, 5.7964, 5.8942, 6.6370, 13.2925, 34.8250, 29.4187}, wind_direction[] = {0.0136, 0.1441, 0.2177, 0.5672, 1.2584, 1.6266, 1.7462}; */ // Atmosphere atm.setup(0.15,(float) (*L0),N_LAYER,altitude,xi0,wind_speed,wind_direction); mexAtExit(cleanup); INIT = 1; } if (*reset>0) { atm.reset(); } atm.get_phase_screen(d__phase_screen,d__x,d__y,N_PIXEL,&src,(float)(*time)); /* Wrap the result up as a MATLAB gpuArray for return. */ plhs[0] = mxGPUCreateMxArrayOnGPU(phase_screen); // atm.cleanup(); mxGPUDestroyGPUArray(x); mxGPUDestroyGPUArray(y); mxGPUDestroyGPUArray(phase_screen); } @ <>= #include #include #include "cublas_v2.h" #include "mex.h" #include "gpu/mxGPUArray.h" #include "definitions.h" #ifndef __CEO_H__ #include "h" #endif #ifndef __SOURCE_H__ #include "source.h" #endif #ifndef __ATMOSPHERE_H__ #include "atmosphere.h" #endif static source src, *d__src; static atmosphere atm; static unsigned int INIT=0; static void cleanup(void) { atm.cleanup(); INIT = 0; } void mexFunction(int nlhs, mxArray *plhs[], int nrhs, mxArray const *prhs[]) { unsigned int inputIndex; float *d__s; double *pitch, *reset, *L0, *time; // char const * const errId = "parallel:gpu:CEO_SCAO_MEX:InvalidInput"; // char const * const errMsg = "Invalid input to MEX file."; /* Check for proper number of input and output arguments */ if (nrhs != 4) { mexErrMsgIdAndTxt( "MATLAB:mxislogical:invalidNumInputs", "Five input argument required."); } if(nlhs > 1){ mexErrMsgIdAndTxt( "MATLAB:mxislogical:maxlhs", "Too many output arguments."); } inputIndex = 0; /* Create GPUArray from mxArray input and get underlying pointer. */ pitch = mxGetPr(prhs[inputIndex++]); reset = mxGetPr(prhs[inputIndex++]); L0 = mxGetPr(prhs[inputIndex++]); time = mxGetPr(prhs[inputIndex++]); /* Create GPUArray to hold the result and get underlying pointer. */ mxGPUArray *s; mwSize dims[2]; dims[0] = _N_LENSLET_*2; dims[1] = 1; s = mxGPUCreateGPUArray(2,dims, mxSINGLE_CLASS,mxREAL,MX_GPU_INITIALIZE_VALUES); d__s = (float *)(mxGPUGetData(s)); if (INIT==0) { // Source src.setup(ARCSEC(0) , 0, 20e3);//INFINITY); HANDLE_ERROR( cudaMalloc( (void**)&d__src, sizeof(source)*_N_SOURCE_ ) ); HANDLE_ERROR( cudaMemcpy( d__src, &src, sizeof(source)*_N_SOURCE_ , cudaMemcpyHostToDevice ) ); // Single layer turbulence profile /* float altitude[] = {10e3}, xi0[] = {1}, wind_speed[] = {2}, wind_direction[] = {0}; */ /* float altitude[] = {0,5000,15000}, xi0[] = {0.33,0.33,0.33}, wind_speed[] = {2,5.6569,6.3246}, wind_direction[] = {0,2.3562,-1.8925}; */ // GMT 7 layers turbulence profile float altitude[] = {25, 275, 425, 1250, 4000, 8000, 13000}, xi0[] = {0.1257, 0.0874, 0.0666, 0.3498, 0.2273, 0.0681, 0.0751}, wind_speed[] = {5.6540, 5.7964, 5.8942, 6.6370, 13.2925, 34.8250, 29.4187}, wind_direction[] = {0.0136, 0.1441, 0.2177, 0.5672, 1.2584, 1.6266, 1.7462}; // Atmosphere atm.setup(0.15,(float) (*L0),altitude,xi0,wind_speed,wind_direction); mexAtExit(cleanup); INIT = 1; } if (*reset>0) { atm.reset(); } atm.get_phase_screen_gradient(d__s,d__s+_N_LENSLET_,N_SIDE_LENSLET, (float)(*pitch),d__src,(float)(*time)); /* Wrap the result up as a MATLAB gpuArray for return. */ plhs[0] = mxGPUCreateMxArrayOnGPU(s); // atm.cleanup(); mxGPUDestroyGPUArray(s); } @ The Matlab test suite: <>= %% % gpuDevice([]) r0 = 15e-2; L0 = 30; atm = atmosphere(photometry.V,r0,L0,'windSpeed',10,'windDirection',0); % atm = atmosphere(photometry.V,r0,L0,... % 'altitude',[0, 500, 1000, 2000, 5000, 8000. 13000],... % 'fractionnalR0',[0.2, 0.1, 0.1, 0.3, 0.2, 0.05, 0.05],... % 'windSpeed',[10, 5, 7.5, 5, 10, 12, 15],... % 'windDirection',[0, 0.25, 0.5, 1, 1.5, 1.75, 2]); % atm = gmtAtmosphere(1); % r0 = atm.r0; % L0 = atm.L0; nxy = 512; ir = '~/CEO'; cd(ir) unix(['sed -i ',... '-e ''s/#define _N_LAYER_ [0-9]*/#define _N_LAYER_ ',num2str(atm.nLayer),'/g'' ',... '-e ''s/#define _N_PIXEL_ [0-9]*/#define _N_PIXEL_ ',... num2str(nxy^2),'/g'' include/definitions.h']); unix('cat include/definitions.h'); unix('make clean lib atmosphere.mex') cd([ir,'/test']) clear atmosphere mex -largeArrayDims -I../include -L../lib -l-o atmosphere atmosphere.mex.cu u = single( L0*gpuArray.linspace(-1,1,nxy) ); [x,y] = meshgrid( u ); phs = atmosphere(x,y,0,L0,0); figure(1) imagesc(u,u,phs) axis square colorbar %% Variance test fprintf('__ Variance Test __\n') clear x y atmosphere cd(ir) unix(['sed -i ',... '-e ''s/#define _N_LAYER_ [0-9]*/#define _N_LAYER_ ',num2str(atm.nLayer),'/g'' ',... '-e ''s/#define _N_PIXEL_ [0-9]*/#define _N_PIXEL_ 1/g'' include/definitions.h']); unix('cat include/definitions.h'); unix('make clean lib atmosphere.mex') cd([ir,'/test']) clear atmosphere mex -largeArrayDims -I../include -L../lib -l-o atmosphere atmosphere.mex.cu tic nxy = 1000; x = gpuArray.rand(1,nxy,'single'); y = gpuArray.rand(1,nxy,'single'); L = 100; x = (2*x-1)*L/2; y = (2*y-1)*L/2; phs_var = gpuArray.zeros(1,nxy,'single'); h = waitbar(0,'Variance Test'); for kxy = 1:nxy phs_var(kxy) = atmosphere(x(kxy),y(kxy),1,L0,0); waitbar(kxy/nxy,h) end close(h) fprintf(' . Theoretical variance: %8.2frd^2\n',phaseStats.variance(atm)) fprintf(' . Numerical variance: %8.2frd^2\n',var(phs_var)) fprintf(' . Variance ratio: %6.5f\n',var(phs_var)/phaseStats.variance(atm)) toc %% Structure function test I fprintf('__ Structure Function Test I __\n') n_sample = 1000; clear atmosphere cd(ir) unix(['sed -i -e ''s/#define _N_PIXEL_ [0-9]*/#define _N_PIXEL_ ',... num2str(n_sample),'/g'' include/definitions.h']); unix('cat include/definitions.h'); unix('make clean lib atmosphere.mex') cd([ir,'/test']) mex -largeArrayDims -I../include -L../lib -l-o atmosphere atmosphere.mex.cu rho = 0:0.25:5; rho(1) = 0.1; nRho = length(rho); mean_sf = zeros(1,nRho); std_sf = zeros(1,nRho); n_plps = 1000; d_phs = gpuArray.zeros(n_plps,n_sample,'single'); hwb = waitbar(0,'Computing SF ...'); for kRho=1:nRho phi = gpuArray.rand(1,n_sample,'single')*2*pi; zRho = rho(kRho).*exp(1i*phi); zxy = (gpuArray.rand(1,n_sample,'single')*2-1)*0.5*L0 + ... 1i*(gpuArray.rand(1,n_sample,'single')*2-1)*0.5*L0; zxy_rho = zxy + zRho; tic for k_plps = 1:n_plps phs_xy = atmosphere(real(zxy),imag(zxy),1,L0,0); phs_xy_rho = atmosphere(real(zxy_rho),imag(zxy_rho),0,L0,0); d_phs(k_plps,:) = phs_xy - phs_xy_rho; end toc sf = var(d_phs); mean_sf(kRho) = gather( mean(sf) ); std_sf(kRho) = gather( std(sf) ); waitbar(kRho/nRho) end close(hwb) figure(25) heb = errorbar(rho,mean_sf, std_sf); set(heb','Marker','o','MarkerSize',8,... 'MarkerFaceColor','r','MarkerEdgeColor','k',... 'Linewidth',2,'LineStyle','none') hold all plot(rho,phaseStats.structureFunction(rho,atm),'Linewidth',2) hold off grid xlabel('Separation [m]') ylabel('Structure function [rd^2]') %% Structure function test II fprintf('__ Structure Function Test II __\n') L0_ = [1 5 25 300]; nL0 = length(L0_); n_plps = 1000; nxy = n_sample; phs_xy = gpuArray.zeros(1,nxy,'single'); phs_xy_rho = gpuArray.zeros(1,nxy,'single'); d_phs = gpuArray.zeros(n_plps,n_sample,'single'); rho = logspace(-2,2,10)'; nRho = length(rho); mean_sf = zeros(nRho,nL0); std_sf = zeros(nRho,nL0); th_sf = zeros(nRho,nL0); for kL0 = 1:nL0 L0 = L0_(kL0); atm.L0 = L0; clear atmosphere hwb = waitbar(0,sprintf('Computing SF for L0=%3.0fm ...',L0)); for kRho=1:nRho phi = gpuArray.rand(1,n_sample,'single')*2*pi; zRho = rho(kRho).*exp(1i*phi); zxy = (gpuArray.rand(1,n_sample,'single')*2-1)*0.5*L0 + ... 1i*(gpuArray.rand(1,n_sample,'single')*2-1)*0.5*L0; zxy_rho = zxy + zRho; tic for k_plps = 1:n_plps phs_xy = atmosphere(real(zxy),imag(zxy),1,L0,0); phs_xy_rho = atmosphere(real(zxy_rho),imag(zxy_rho),0,L0,0); d_phs(k_plps,:) = phs_xy - phs_xy_rho; end toc sf = var(d_phs); mean_sf(kRho,kL0) = gather( mean(sf) ); std_sf(kRho,kL0) = gather( std(sf) ); waitbar(kRho/nRho) end close(hwb) th_sf(:,kL0) = phaseStats.structureFunction(rho,atm); figure(26) heb = errorbar(repmat(rho,1,kL0),mean_sf(:,1:kL0), std_sf(:,1:kL0)); set(heb','Marker','o','MarkerSize',8,... 'MarkerFaceColor','r','MarkerEdgeColor','k',... 'Linewidth',2,'LineStyle','none','color','b') hold all plot(rho,th_sf(:,1:kL0),'color','k','Linewidth',2) hold off grid xlabel('Separation [m]') ylabel('Structure function [rd^2]') set(gca,'xscale','log','yscale','log') drawnow end for kL0=1:nL0 text(rho(end),mean_sf(end,kL0)*.7,sprintf('L0=%3.0fm',L0_(kL0)),... 'VerticalAlignment','top','BackgroundColor','w') end %% Zernike test fprintf('__ Zernike Test __\n') L0 = 30; atm.L0 = L0; nxy = 128; clear atmosphere cd(ir) unix(['sed -i ',... '-e ''s/#define _N_LAYER_ [0-9]*/#define _N_LAYER_ ',num2str(atm.nLayer),'/g'' ',... '-e ''s/#define _N_PIXEL_ [0-9]*/#define _N_PIXEL_ ',... num2str(nxy^2),'/g'' include/definitions.h']); unix('cat include/definitions.h'); unix('make clean lib atmosphere.mex') cd([ir,'/test']) mex -largeArrayDims -I../include -L../lib -l-o atmosphere atmosphere.mex.cu u = 12.5*single(gpuArray.linspace(-1,1,nxy)); [x,y] = meshgrid(u); ngs = source('zenith',0,'azimuth',0,'height',90e3); zern = zernike(1:66,25,'resolution',nxy); nIt = 4000; zernCoefs = gpuArray.zeros(zern.nMode,nIt,'single'); h = waitbar(0,'Zernike Test !'); for kTau=1:nIt phs = atmosphere(x,y,1,L0,0); zern = zern.\phs; zernCoefs(:,kTau) = zern.c; waitbar(kTau/nIt,h) end close(h) figure(29) h = semilogy(zern.j,var(zernCoefs,0,2),'ko',... zern.j,zernikeStats.variance(zern,atm,ngs),'.-',... zern.j,zernikeStats.variance(zern,atm),'ko--'); set(h(1),'MarkerFaceColor','r') grid xlabel('Zernike mode') ylabel('Zernike coef. variance [rd^2]') %for kTau=1:nTau;phs = atmosphere(x,y,0,L0,(kTau-1)*tau);imagesc(phs);axis square;colorbar;drawnow;end %% Taylor (frozen flow) hypothesis test fprintf('__ Taylor (frozen flow) Hypothesis Test __\n') tic phs = atmosphere(x,y,0,L0,0); toc figure(27) imagesc(u,u,phs) axis square colorbar nIt = 1000; tau = 1/10; duration = 5; nTau = duration/tau; wind = 10;%.*exp(1i*pi/3); % wind = 10.*exp(1i*sin(2*pi*(0:nIt-1)*tau*1)); zern = zernike(1:22,25,'resolution',nxy); zernCoefs = gpuArray.zeros(zern.nMode,nTau,nIt,'single'); h = waitbar(0,'Taylor (frozen flow) hypothesis test!'); for kIt=1:nIt phs = atmosphere(x,y,1,L0,0); for kTau=1:nTau phs = atmosphere(x,y,0,L0,(kTau-1)*tau); zern = zern.\phs; zernCoefs(:,kTau,kIt) = zern.c; % set(h,'Cdata',h_phs) % drawnow end waitbar(kIt/nIt,h) end close(h) tau_ = (0:nTau-1)*tau; ngs = source; zcov = zeros(zern.nMode,zern.nMode,nTau); if matlabpool('size')==0 matlabpool open end tic parfor kTau=1:nTau zcov(:,:,kTau) = ... zernikeStats.temporalAngularCovariance(zern,atm,tau_(kTau),ngs,ngs); end toc zcov_diag =cell2mat( ... arrayfun( @(x) squeeze( zcov(x,x,:) ) , 1:22, 'UniformOutput', false) ); figure(30) h_th = plot(tau_,zcov_diag(:,2:8),'LineWidth',2); grid xlabel('Time [s]') ylabel('Zernike coef. covariance [rd^2]') legend(num2str((2:8)'),0) hold off C = mean( bsxfun( @times , zernCoefs(:,1,:) , zernCoefs ) , 3); hold all h_num = plot(tau_,C(2:8,:)','.','MarkerSize',15); hold off @ <>= #ifndef __CEO_H__ #include "h" #endif #ifndef __SOURCE_H__ #include "source.h" #endif #ifndef __ATMOSPHERE_H__ #include "atmosphere.h" #endif #ifndef __IMAGING_H__ #include "imaging.h" #endif #ifndef __CENTROIDING_H__ #include "centroiding.h" #endif #ifndef __AASTATS_H__ #include "aaStats.h" #endif #ifndef __BTBT_H__ #include "BTBT.h" #endif #include "iterativeSolvers.h" int main( void) { atmosphere atm; imaging lenslet_array; centroiding cog; float slopes2Angle, d, cxy0; int N = _N_LENSLET_*2; // Single layer turbulence profile float altitude[] = {0}, xi0[] = {1}, wind_speed[] = {10}, wind_direction[] = {0}; d = 1; atm.setup(d,30,altitude,xi0,wind_speed,wind_direction); //atm.reset(); slopes2Angle = (atm.wavelength/2/d); source src, *d__src; src.setup(ARCSEC(0) , 0, INFINITY); HANDLE_ERROR( cudaMalloc( (void**)&d__src, sizeof(source)*_N_SOURCE_ ) ); HANDLE_ERROR( cudaMemcpy( d__src, &src, sizeof(source)*_N_SOURCE_ , cudaMemcpyHostToDevice ) ); // SH WFS lenslet_array.setup(); // Centroid cog.setup(); float phase_screen[_N_PIXEL_]; float delta, delta_e; int PS_N_PX, PS_E_N_PX, NP; FILE *fid; float *phase_screen_low_res, *d__phase_screen_low_res, *b; PS_N_PX = _N_PX_PUPIL_*N_SIDE_LENSLET; PS_E_N_PX = PS_N_PX*PS_N_PX; NP = PS_N_PX; delta=d/16.; atm.get_phase_screen(delta,PS_N_PX,delta,PS_N_PX,d__src,0); HANDLE_ERROR( cudaMemcpy( phase_screen,d__src->phase, sizeof(float)*PS_E_N_PX, cudaMemcpyDeviceToHost ) ); fid = fopen("phaseScreen.bin","wb"); fwrite(phase_screen,sizeof(float),PS_E_N_PX,fid); fclose(fid); PS_N_PX = 9; PS_E_N_PX = PS_N_PX*PS_N_PX; NP = PS_N_PX; HANDLE_ERROR( cudaMalloc( (void**)&d__phase_screen_low_res, sizeof(float)*PS_E_N_PX ) ); delta_e = d/2; atm.get_phase_screen(d__phase_screen_low_res,delta_e,NP,delta_e,NP,d__src,0); phase_screen_low_res = (float*)malloc(sizeof(float)*PS_E_N_PX); HANDLE_ERROR( cudaMemcpy( phase_screen_low_res, d__phase_screen_low_res, sizeof(float)*PS_E_N_PX, cudaMemcpyDeviceToHost ) ); fid = fopen("phaseScreenLowRes.bin","wb"); fwrite(phase_screen_low_res,sizeof(float),PS_E_N_PX,fid); fclose(fid); lenslet_array.propagate(d__src); cxy0 = (_N_PX_PUPIL_ - 1)/2.0; cog.get_data(lenslet_array.d__frame, cxy0, cxy0, slopes2Angle); b = (float *)malloc(sizeof(float)*N); HANDLE_ERROR( cudaMemcpy( b, cog.d__c, sizeof(float)*N, cudaMemcpyDeviceToHost ) ); printf("\n Cx Cy\n"); for (int k=0;k<_N_LENSLET_;k++) { printf("%+6.4E %+6.4E\n",b[k],b[k+_N_LENSLET_]); } atm.get_phase_screen_gradient(cog.d__cx, cog.d__cy, _N_LENSLET_, d, d__src, 0); HANDLE_ERROR( cudaMemcpy( b, cog.d__c, sizeof(float)*N, cudaMemcpyDeviceToHost ) ); printf("\n Cx Cy\n"); for (int k=0;k<_N_LENSLET_;k++) { printf("%+6.4E %+6.4E\n",b[k],b[k+_N_LENSLET_]); } atm.cleanup(); lenslet_array.cleanup(); cog.cleanup(); HANDLE_ERROR( cudaFree( d__src) ); HANDLE_ERROR( cudaFree( d__phase_screen_low_res ) ); free(phase_screen_low_res); }