>= atm.get_phase_screen(&src,delta_e,NP,delta_e,NP,0); src.phase2file("phaseScreenLowRes.bin"); <> // Iterative solver //iSolve.cg_setup(N); iSolve.minres_setup(N); //aaCov.mask = d__mask; dev2file("mask.bin",d__mask,_N_LENSLET_); char filename[100]; /* for (k=1;k<6;k++) { */ k = 100; sprintf(filename,"MINRES_phaseEst_%d.bin",k); //sprintf(filename,"CG_phaseEst_%d.bin",k); HANDLE_ERROR( cudaMalloc((void**)&d__x, sizeof(float)*N ) ); HANDLE_ERROR( cudaMemset(d__x, 0, sizeof(float)*N ) ); tid.tic(); //cudaProfilerStart(); iSolve.minres_vorst(d__x, &aaCov, cog.d__c, d__x); //iSolve.cg(d__x, &aaCov, cog.d__c, k, d__x); //cudaProfilerStop(); <> tid.toc("WAVEFRONT ESTIMATION"); <> //printf("\nSolver residue norm : %.2E\n",iSolve.rnorm); //printf("\nSolver mean time per iteration: %.2E\n",iSolve.mean_time_per_iteration); /* } */ @ \subsection{Convergence test} \label{sec:convergence-test} The [[convergence test]] reconstructs the wavefront with both iterative solver MINRES and CG at each iteration step. <>= atm.get_phase_screen(&src,delta_e,NP,delta_e,NP,0); char filename[100]; sprintf(filename,"CVGCE_phaseScreenLowRes_%03d.bin",N_SIDE_LENSLET); src.phase2file(filename); <> if (cog.MASK_SET) aaCov.mask = cog.lenslet_mask; sprintf(filename,"bins/CVGCE_%s_phaseEst_200_%03d.bin",solver,N_SIDE_LENSLET); FILE *fid; fid = fopen(filename,"wb"); int n_page = 1, n_data = PS_E_N_PX*200; fwrite(&n_page,sizeof(int),1,fid); fwrite(&n_data,sizeof(int),1,fid); phase_screen_est = (float*)malloc(sizeof(float)*PS_E_N_PX); HANDLE_ERROR( cudaMalloc((void**)&d__x, sizeof(float)*N ) ); // CG if (strcmp(solver,"CG")==0) { iSolve.pcg_setup(N); for (k=1;k<=200;k++) { printf("\n______ ITERATION #: %03d ______\n",k); HANDLE_ERROR( cudaMemset(d__x, 0, sizeof(float)*N ) ); tid.tic(); iSolve.pcg(d__x, &aaCov, cog.d__c, k, d__x, 1./aa.variance()); <> tid.toc("WAVEFRONT ESTIMATION"); HANDLE_ERROR( cudaMemcpy( phase_screen_est, d__phase_est, sizeof(float)*PS_E_N_PX, cudaMemcpyDeviceToHost ) ); fwrite(phase_screen_est,sizeof(float),PS_E_N_PX,fid); printf("\nSolver residue norm : %.2E\n",iSolve.rnorm); printf("\nSolver mean time per iteration: %.2E\n",iSolve.mean_time_per_iteration); printf("\n-------------------------------\n"); } } // MINRES if (strcmp(solver,"MINRES")==0) { iSolve.minres_setup(N); iSolve.RTOL = 1e-6; for (k=1;k<=200;k++) { printf("\n______ ITERATION #: %03d ______\n",k); HANDLE_ERROR( cudaMemset(d__x, 0, sizeof(float)*N ) ); tid.tic(); iSolve.minres_vorst(d__x, &aaCov, cog.d__c, k, d__x); <> tid.toc("WAVEFRONT ESTIMATION"); HANDLE_ERROR( cudaMemcpy( phase_screen_est, d__phase_est, sizeof(float)*PS_E_N_PX, cudaMemcpyDeviceToHost ) ); fwrite(phase_screen_est,sizeof(float),PS_E_N_PX,fid); printf("\nSolver residue norm : %.2E\n",iSolve.rnorm); printf("\nSolver mean time per iteration: %.2E\n",iSolve.mean_time_per_iteration); printf("\n-------------------------------\n"); } } fclose(fid); @ \subsection{Hot convergence test} \label{sec:hot-convergence-test} The [[hot convergence test]] reconstructs the wavefront with MINRES re-using the previous estimate as starting point for the next one. <>= LMMSE lmmse; lmmse.setup(&atm,&gs,1,&src,1,d,N_SIDE_LENSLET,&pupil_mask,"MINRES"); int n_step = 200; HANDLE_ERROR( cudaMalloc( (void**)&d__phase_screen_low_res, sizeof(float)*PS_E_N_PX*n_step ) ); float *d__gs_phase_screen_low_res; HANDLE_ERROR( cudaMalloc( (void**)&d__gs_phase_screen_low_res, sizeof(float)*PS_E_N_PX*n_step ) ); HANDLE_ERROR( cudaMalloc( (void**)&d__phase_screen_est, sizeof(float)*PS_E_N_PX*n_step ) ); //if (cog.MASK_SET) aaCov.mask = cog.lenslet_mask; HANDLE_ERROR( cudaMalloc((void**)&d__x, sizeof(float)*N ) ); HANDLE_ERROR( cudaMemset(d__x, 0, sizeof(float)*N ) ); float time_step,tau=0, tau0=0, delta_tau; time_step = 2; //iSolve.minres_setup(N); lmmse.iSolve.VERBOSE = 0; lmmse.iSolve.RTOL = 5E-2; // First wavefront estimate atm.get_phase_screen_gradient(&cog,N_SIDE_LENSLET,d,&gs,tau); tid.tic(); //iSolve.minres_vorst(d__x, &aaCov, cog.d__c, d__x); lmmse.estimation(&cog); tid.toc(&tau0,"WAVEFRONT ESTIMATION"); tau = tau0; //lmmse.iSolve.N_ITERATION = 5; //lmmse.iSolve.ATOL = lmmse.iSolve.rnorm; // MINRES for (k=1;k<=n_step;k++) { printf("\n______ ITERATION #: %03d ______\n",k); tau += time_step; atm.get_phase_screen_gradient(&cog,N_SIDE_LENSLET,d,&gs,1E-3*tau); // lmmse.d__phase_est_c = d__phase_screen_est + (k-1)*PS_E_N_PX; lmmse.set_phase_est_ptr( d__phase_screen_est + (k-1)*PS_E_N_PX ); tid.tic(); /* iSolve.minres_vorst(d__x, &aaCov, cog.d__c, d__x); */ /* paCov.MVM(d__phase_screen_est + (k-1)*PS_E_N_PX, d__x); */ lmmse.estimation(&cog); tid.toc(&delta_tau, "WAVEFRONT ESTIMATION"); //tau += delta_tau + 0*time_step; src.wavefront.phase = d__phase_screen_low_res + (k-1)*PS_E_N_PX; atm.get_phase_screen(&src,delta_e,NP,delta_e,NP,1E-3*tau); gs.wavefront.phase = d__gs_phase_screen_low_res + (k-1)*PS_E_N_PX; atm.get_phase_screen(&gs,delta_e,NP,delta_e,NP,1E-3*tau); /* HANDLE_ERROR( cudaMemcpy( d__phase_screen_low_res + (k-1)*PS_E_N_PX, src.wavefront.phase, */ /* sizeof(float)*PS_E_N_PX, */ /* cudaMemcpyDeviceToDevice) ); */ //printf("\nSolver residue norm : %.2E\n",iSolve.rnorm); //printf("\nSolver mean time per iteration: %.2E\n",iSolve.mean_time_per_iteration); printf("\n-------------------------------\n"); /* if (k==1) */ /* iSolve.ATOL = iSolve.rnorm; */ //if (k>1) //iSolve.N_ITERATION = 10; } char filename[100]; sprintf(filename,"CVGCE_phaseScreenLowRes_%03d.bin",N_SIDE_LENSLET); dev2file(filename,d__phase_screen_low_res,PS_E_N_PX*n_step); sprintf(filename,"CVGCE_GS_phaseScreenLowRes_%03d.bin",N_SIDE_LENSLET); dev2file(filename,d__gs_phase_screen_low_res,PS_E_N_PX*n_step); sprintf(filename,"CVGCE_%s_phaseEst_%03d_%03d.bin",solver,n_step,N_SIDE_LENSLET); dev2file(filename,d__phase_screen_est,PS_E_N_PX*n_step); lmmse.cleanup(); HANDLE_ERROR( cudaFree( d__gs_phase_screen_low_res ) ); @ \subsection{Statistics test} \label{sec:statistics-test} The [[statistics test]] reconstructs the wavefront n times, each time a new statistically independent phase screen is generated. <>= float *d__phase_screen_low_res; HANDLE_ERROR( cudaMalloc( (void**)&d__phase_screen_low_res, sizeof(float)*PS_E_N_PX ) ); float *phase_screen_low_res; phase_screen_low_res = (float*)malloc(sizeof(float)*PS_E_N_PX); HANDLE_ERROR( cudaMemcpy( phase_screen_low_res, d__phase_screen_low_res, sizeof(float)*PS_E_N_PX, cudaMemcpyDeviceToHost ) ); <> aaCov.mask = d__mask; <<<> FILE *fid0, *fid; char filename[100]; sprintf(filename,"STATS_phaseScreenLowRes_%03d.bin",N_SIDE_LENSLET); fid0 = fopen(filename,"wb"); int nSample = 200, nIt = 200; sprintf(filename,"STATS_%s_phaseEst_%03d_%03d.bin",solver,nSample,N_SIDE_LENSLET); fid = fopen(filename,"wb"); phase_screen_est = (float*)malloc(sizeof(float)*PS_E_N_PX); HANDLE_ERROR( cudaMalloc( (void**)&d__phase_est , sizeof(float)*PS_E_N_PX ) ); // CG if (strcmp(solver,"CG")==0) { iSolve.cg_setup(N); for (k=1;k<=nSample;k++) { printf("\n______ ITERATION #: %03d ______\n",k); <> HANDLE_ERROR( cudaMemset(d__x, 0, sizeof(float)*N ) ); tid.tic(); iSolve.cg(d__x, &aaCov, cog.d__c, nIt, d__x); <> tid.toc("WAVEFRONT ESTIMATION"); HANDLE_ERROR( cudaMemcpy( phase_screen_est, d__phase_est, sizeof(float)*PS_E_N_PX, cudaMemcpyDeviceToHost ) ); fwrite(phase_screen_est,sizeof(float),PS_E_N_PX,fid); printf("\nSolver residue norm : %.2E\n",iSolve.rnorm); printf("\nSolver mean time per iteration: %.2E\n",iSolve.mean_time_per_iteration); printf("\n-------------------------------\n"); atm.reset(); } } // MINRES if (strcmp(solver,"MINRES")==0) { iSolve.minres_setup(N); for (k=1;k<=nSample;k++) { printf("\n______ ITERATION #: %03d ______\n",k); <> HANDLE_ERROR( cudaMemset(d__x, 0, sizeof(float)*N ) ); tid.tic(); iSolve.minres_vorst(d__x, &aaCov, cog.d__c, nIt, d__x); <> tid.toc("WAVEFRONT ESTIMATION"); HANDLE_ERROR( cudaMemcpy( phase_screen_est, d__phase_est, sizeof(float)*PS_E_N_PX, cudaMemcpyDeviceToHost ) ); HANDLE_ERROR( cudaThreadSynchronize() ); fwrite(phase_screen_est,sizeof(float),PS_E_N_PX,fid); printf("\n Data saved to file!\n"); printf("\nSolver residue norm : %.2E\n",iSolve.rnorm); printf("\nSolver mean time per iteration: %.2E\n",iSolve.mean_time_per_iteration); printf("\n-------------------------------\n"); atm.reset(); } } fclose(fid0); fclose(fid); @ This is the place where we terminate the test elegantly. <>= atm.cleanup(); lenslet_array.cleanup(); cog.cleanup(); aa.cleanup(); pa.cleanup(); aaCov.cleanup(); paCov.cleanup(); iSolve.cleanup(); S.cleanup(); HANDLE_ERROR( cudaFree( d__x ) ); HANDLE_ERROR( cudaFree( d__phase_est ) ); if (d__phase_screen_low_res) HANDLE_ERROR( cudaFree( d__phase_screen_low_res ) ); if (d__phase_screen_est) HANDLE_ERROR( cudaFree( d__phase_screen_est ) ); HANDLE_ERROR( cudaFree( d__mask ) ); if (phase_screen_low_res) free(phase_screen_low_res); if (phase_screen_est) free(phase_screen_est); @ <>= __global__ void set_pa_input(float *pa_c, float *aa_c, int *idx, int N) { int i, j, k; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; if ( (i>= atm.get_phase_screen(d__phase_screen_low_res,delta_e,NP,delta_e,NP,d__src,0); HANDLE_ERROR( cudaMemcpy( phase_screen_low_res, d__phase_screen_low_res, sizeof(float)*PS_E_N_PX, cudaMemcpyDeviceToHost ) ); fwrite(phase_screen_low_res,sizeof(float),PS_E_N_PX,fid0); <> @ The wavefront sensing model is using either a Fourier optics or geometric optics model. <>= <> <> <>= <> <> @ <>= lenslet_array.propagate(d__src); cxy0 = (_N_PX_PUPIL_ - 1)/2.0; cog.get_data(lenslet_array.d__frame, cxy0, cxy0, slopes2Angle); @ <>= atm.get_phase_screen_gradient(&cog,N_SIDE_LENSLET,d,&src,1,0); <>= atm.get_phase_screen_gradient(&cog,N_SIDE_LENSLET,d__mask,d,&src,1,0); @ <>= dev2file("centroids.bin",cog.d__c,N); @ The index of the WFS slopes in the augmented input vector of the MVM operation are computed next: <>= idx = (int *)malloc(sizeof(int)*_N_LENSLET_); k = -1; //printf("\n k idx\n"); for (i=1;i<2*N_SIDE_LENSLET;i+=2) { for (j=1;j<2*N_SIDE_LENSLET;j+=2) { idx[++k] = i*(2*N_SIDE_LENSLET + 1) + j; //printf("(%2d) %2d\n",k,idx[k]); } } HANDLE_ERROR( cudaMalloc( (void**)&d__idx, sizeof(int)*_N_LENSLET_ ) ); HANDLE_ERROR( cudaMemcpy( d__idx, idx, sizeof(int)*_N_LENSLET_, cudaMemcpyHostToDevice ) ); HANDLE_ERROR( cudaMalloc((void**)&d__x, sizeof(float)*N ) ); x = (float*)malloc(sizeof(float)*N); @ CG iterative solver call: <>= iSolve.cg(d__x, &aaCov, d__b, 5, d__x); @ MINRES iterative solver call: <>= iSolve.minres(d__x, &aaCov, d__b, 5, d__x); @ Allocation of the input vector for the MVM: <>= HANDLE_ERROR( cudaMalloc((void**)&d__ce, sizeof(float)*PS_E_N_PX*2 ) ); HANDLE_ERROR( cudaMemset(d__ce, 0, sizeof(float)*PS_E_N_PX*2 ) ); dim3 blockDim(16,16); dim3 gridDim(N_SIDE_LENSLET/16+1,N_SIDE_LENSLET/16+1); @ Writing the input of the iterative solver into the output of the MVM: <>= paCov.MVM(d__phase_est,d__x); @ Saving the MVM wavefront estimate to a file: <>= dev2file(filename,d__phase_est,PS_E_N_PX); @