% -*- mode: Noweb; noweb-code-mode: c-mode -*- \index{source} The source structure contains all the data associated with a remote optical emitter: its location, wavefront, irradiance, ... \section{The files} \subsection{Header} <>= #ifndef __SOURCE_H__ #define __SOURCE_H__ #ifndef __UTILITIES_H__ #include "utilities.h" #endif <> <> <> struct source { <> void setup(const char *_photometric_band, float zenith, float azimuth, float height); void setup(const char *_photometric_band, float zenith, float azimuth, float height, int resolution); void setup(const char *_photometric_band, float zenith, float azimuth, float height, const char *tag_in); void setup(const char *_photometric_band, float zenith, float azimuth, float height, int resolution, const char *tag_in); void setup(const char *_photometric_band, float *_zenith, float *_azimuth, float _height, int _N_SRC); void setup(const char *_photometric_band, float *_zenith, float *_azimuth, float _height, int _N_SRC, int resolution); void setup(const char *_photometric_band, float *_zenith, float *_azimuth, float _height, int _N_SRC, rtd _L_, int _N_L_, vector origin); void setup(const char *_photometric_band, float *magnitude, float *_zenith, float *_azimuth, float _height, int _N_SRC, rtd _L_, int _N_L_, vector origin); void setup(const char *_photometric_band, float *magnitude, rtd *_zenith, rtd *_azimuth, float _height, int _N_SRC, rtd _L_, int _N_L_, vector origin); void setup_chief(const char *_photometric_band, float *magnitude, rtd *_zenith, rtd *_azimuth, float _height, int _N_SRC, rtd _L_, int _N_L_, vector origin, vector chief_origin); void setup(const char *_photometric_band, float *_magnitude, rtd *_zenith, rtd *_azimuth, float _height, int _N_SRC, int _N_RAY_, double *x, double *y, vector origin); void cleanup(void); void reset_rays(void); void reset_rays(int RESET_RAYS_MASK); void opd2phase(void); void opd2phase(int RESET_RAYS_MASK); void info(void); void phase2file(const char *filename); float wavelength(void); float wavelength_micron(void); float spectral_bandwidth(void); float n_photon(void); float n_photon(float _magnitude_); float n_background_photon(float backgroundMagnitude); float wavenumber(void); void update_directions(double *zenith, double *azimuth, int N_DIR); void update_magnitude(float *magnitude, int N_MAG); void copy_magnitude(source *other_src); void optical_transfer_function(float2 *d__otf); }; <> #endif // __SOURCE_H__ @ \subsection{Source} <>= #include "source.h" <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> @ \section{Parameters} \label{sec:params} \index{source!source} The source structure is a collection of [[N_SRC]] light sources. <>= int N_SRC; @ The source locations are given by their [[zenith]] and [[azimuth]] angles, all the sources share the same height. <>= float zenith, azimuth, height, theta_x, theta_y; @ A double precision copy of the same parameters is used by the ray tracing engine: <>= rtd _zenith_64_, _azimuth_64_, _height_64_, _theta_x_64_, _theta_y_64_; @ The coordinates of the unit vector pointing towards the sources are given by [[theta_x]] and [[theta_y]]. The wavefront is the complex amplitude of the light beam. It is given by its amplitude and phase, both are arrays of [[N_PX]] values. The complex amplitude is defined in a new structure: <>= struct complex_amplitude { <> void setup(int n_pixel); void setup(int n_pixel, int n_src); void cleanup(void); void reset(void); void reset(complex_amplitude *wavefront); void reset_amplitude(void); void reset_phase(void); void reset_phase(complex_amplitude *wavefront_prime); void add_phase(float alpha, float *phase_prime); void add_same_phase(float alpha, float *phase_prime); void masked(void); void masked(mask *M_in); void rms(float *rms); void finite_difference(float *sx, float *sy, int NL, float d); void finite_difference(float *sx, float *sy, int NL, float d, mask *valid_lenslet); void gradient_average(float *sx, float *sy, int NL, float d); void gradient_average(float *sx, float *sy, float d); void segments_gradient_average(float *sx, float *sy, float D, int *segment_markers); void segments_gradient_averageFast(float *sx, float *sy, float D, int *segment_markers); void show_phase(char *filename); void show_phase(char *filename, int N_SRC); void show_amplitude(char *filename); void show_amplitude(char *filename, int N, int M); }; @ with the parameters: <>= int N_PX, N; float *amplitude, *phase; @ The physical extent of the amplitude is set by the pupil mask: <>= mask *M; @ The magnitude and associated number of photon at a given photometric band are set with: <>= const char *photometric_band; float magnitude, N_PHOTON; @ If the source is resolved by the optical system, the irradiance is assumed to have a Gaussian shape of full width a half maximum [[fwhm]] in detector pixel unit (before binning) <>= float fwhm; @ The wavefront is added to the source <>= complex_amplitude wavefront; @ Source structures will be allocated on the device: <>= source *dev_ptr; @ and the source can be tagged with <>= char tag[8]; @ A source can also be propagated geometrically through optical components <>= char rays_exist; bundle rays; @ Other parameters of the [[complex_amplitude]] structure are: <>= cublasHandle_t handle; float *buffer; @ \section{Functions} \label{sec:functions} \subsection{Ray} \label{sec:ray} \index{source!ray} A type for the rays is also defined: <>= typedef struct { <> } ray; @ It contains the ray coordinates: <>= vector coordinates; @ the direction cosines of the ray: <>= vector directions; @ the [[surface_normal]] at the surface it intersected with the last time: <>= vector surface_normal; @ the optical path length: <>= rtd optical_path_length; @ the optical path difference <>= rtd optical_path_difference; @ the vignetting flag [[v]], $[[v]]=0$ means the ray is vignetted: <>= char v; @ the number of iterative steps for the Raphson--Newton method: <>= int n_iteration; @ the energy throughput associated with each ray: <>= rtd throughput; @ \subsection{Ray bundle} \label{sec:ray-bundle} \index{source!bundle} Collections of rays are gathered into bundles: <>= struct bundle { <> void setup(rtd RADIUS, int N_RADIUS, int N_THETA, vector origin, int N_SRC); void setup(rtd L, int N_L, vector origin, int N_SRC); void setup(rtd L, int N_L, vector origin, vector chief_origin, int N_SRC); void setup_free(int _N_RAY_, double *x, double *y, vector origin); void setup_free(double zenith, double azimuth, int _N_RAY_, double *x, double *y, vector origin); void cleanup(void); void to_z_plane(rtd z_in); void to_focal_plane(rtd z_chief_on_axis, rtd rho_focal_plane); void to_sphere(vector sphere_origin); void to_sphere(rtd z_chief_on_axis, rtd rho_focal_plane); const void get_coordinates(double *d__coord); void get_chief_coordinates(double *d__coord); void get_sphere_origins(double *d__coord); void get_directions(double *d__dir); void get_chief_directions(double *d__dir); void get_chief_optical_path_length(double *d__opl); void get_optical_path_length(double *d__opl); void get_optical_path_difference(double *d__opd); void get_optical_path_difference(double *d__opd, float const delta_x, int N_x, float const delta_y, int N_y); void get_vignetting(double *d__v); void get_n_iteration(int *n_iteration); void gmt_truss_onaxis(); void gmt_truss_onaxis_1(const double scale); void gmt_truss_onaxis_2(const double scale); void gmt_truss_onaxis_3(const double scale); void gmt_pma_onaxis(void); void gmt_pma_plate_onaxis(void); void gmt_m2_baffle(void); }; @ A bundle allocates an array of [[N_RAY]] rays on the device <>= int N_RAY; ray *d__ray; @ The structure may contain several bundle of rays, each associated to a different source but with the same number of rays <>= int N_BUNDLE; @ The total number of rays <>= int N_RAY_TOTAL; @ The x and y coordinates of the [[origin]] vector specifies the location of the center of the ray bundle when it crosses the (x,y) plane of the first surface. The z coordinate specifies the height of the ray bundle with respect to the first surface. The ray bundle can be rotated clockwise by [[rot_angle]]. <>= vector *d__origin; double rot_angle; @ The chief ray is defined with <>= ray *d__chief_ray; vector *d__chief_origin; @ The vignetting mask is defined with <>= mask V; @ The ray geometry is defined either in polar coordinates (\textit{fan} mode) <>= char geom[8]; @ where the radius and azimuth angle are sampled with [[N_RADIUS]] and [[N_THETA]] rays, <>= int N_RADIUS, N_THETA; @ or in cartesian coordinates (\textit{box} mode) where the rays sample a [[N_L]]$\times$[[N_L]] square <>= int N_L; @ In \textit{fan} mode, the radius is saved in [[L]] and in \textit{box} mode the side of the box is saved in [[L]]: <>= rtd L; @ The optical path difference is computed with respect to a reference sphere which is centered on the object in the image plane and which is tangent to the exit pupil. <>= rtd *d__sphere_distance, *d__sphere_radius; vector *d__sphere_origin; @ The GMT segment piston mask: <>= int *d__piston_mask; @ The refractive index of the medium they have passed through last: <>= rtd refractive_index; @ The on--axis projection of the truss on M1 center segment creates a polygon repeated 3 times around the segment center of symmetry. [[d__Vx]] and [[d__Vy]] are the x and y coordinates, respectively, of the vertices of the first polygon <>= double *d__Vx, *d__Vy; @ \subsubsection{Setup \& Cleanup} \label{sec:bundle-setup--cleanup} A ray bundle is specified with either the polar or cartesian coordinates of the rays at a given origin. The direction cosine are derived from the coordinates of a source object. The polar coordinates are defined with the sampling of the radius [[N_RADIUS]] from 0 to [[RADIUS]] and with the sampling of the azimuth [[N_THETA]]. \index{source!bundle!setup} <>= void bundle::setup(rtd RADIUS, int _N_RADIUS_, int _N_THETA_, vector origin, int N_SRC) { strcpy(geom,"fan"); N_RADIUS = _N_RADIUS_; N_THETA = _N_THETA_; L = RADIUS; N_BUNDLE = N_SRC; N_RAY = (N_RADIUS-1)*N_THETA + 1; N_RAY_TOTAL = N_RAY*N_BUNDLE; rot_angle = 0.0; V.setup(N_RAY_TOTAL); HANDLE_ERROR( cudaMalloc((void**)&d__ray, sizeof(ray)*N_RAY_TOTAL ) ); HANDLE_ERROR( cudaMalloc((void**)&d__origin, sizeof(vector) ) ); HANDLE_ERROR( cudaMemcpy( d__origin, &origin, sizeof(vector), cudaMemcpyHostToDevice ) ); <> <> } @ with <>= d__Vx = NULL; d__Vy = NULL; @ The direction cosines $(k,l,m)$ of the rays are defined from the source zenith $\zeta$ and azimuth $\xi$ angles as \begin{eqnarray} \label{eq:2} k &=& \sin(\zeta)\cos(\xi) \\ l &=& \sin(\zeta)\sin(\xi) \\ k &=& \cos(\zeta) \end{eqnarray} The polar coordinates $\rho$ and $\theta$ of the rays are given by \begin{equation} \rho = [[RADIUS]] {k \over [[N_RADIUS]] - 1 } \forall k \in [0,[[N_RADIUS]] - 1] \end{equation} and \begin{equation} \theta = 2\pi {k \over [[N_THETA]] } \forall k \in [0,[[N_THETA-1]]] \end{equation} <>= <> blockDim = dim3(N_THREAD,N_THREAD); gridDim = dim3(N_RADIUS/N_THREAD+1,N_THETA/N_THREAD+1, N_BUNDLE); ray_coordinates LLL gridDim , blockDim RRR (d__ray, N_RAY, src->dev_ptr, L, N_RADIUS, N_THETA, d__origin); @ The chief ray is defined with <>= <> <> @ <>= refractive_index = 1.0; HANDLE_ERROR( cudaMalloc((void**)&d__chief_ray, sizeof(ray)*N_BUNDLE ) ); HANDLE_ERROR( cudaMalloc((void**)&d__sphere_origin, sizeof(vector)*N_BUNDLE ) ); HANDLE_ERROR( cudaMalloc((void**)&d__sphere_radius, sizeof(rtd)*N_BUNDLE ) ); HANDLE_ERROR( cudaMalloc((void**)&d__sphere_distance, sizeof(rtd)*N_BUNDLE ) ); @ <>= vector chief_origin; chief_origin.x = 0.0; chief_origin.y = 0.0; chief_origin.z = origin.z; HANDLE_ERROR( cudaMalloc((void**)&d__chief_origin, sizeof(vector) ) ); HANDLE_ERROR( cudaMemcpy( d__chief_origin, &chief_origin, sizeof(vector), cudaMemcpyHostToDevice ) ); @ and is initialized with <>= ray_coordinates LLL gridDim , blockDim RRR (d__chief_ray, 1, src->dev_ptr, 0.0, 2, 1, d_chief__origin); @ The cartesian coordinates are defined with the sampling of the square box length [[N_L]] from $-[[L]]$ to [[L]]. \index{source!bundle!setup} <>= void bundle::setup(rtd _L_, int _N_L_, vector origin, int N_SRC) { strcpy(geom,"box"); N_L = _N_L_; L = _L_; N_BUNDLE = N_SRC; N_RAY = N_L*N_L; N_RAY_TOTAL = N_RAY*N_BUNDLE; rot_angle = 0.0; V.setup(N_RAY_TOTAL); HANDLE_ERROR( cudaMalloc((void**)&d__ray, sizeof(ray)*N_RAY_TOTAL ) ); HANDLE_ERROR( cudaMalloc((void**)&d__origin, sizeof(vector) ) ); HANDLE_ERROR( cudaMemcpy( d__origin, &origin, sizeof(vector), cudaMemcpyHostToDevice ) ); HANDLE_ERROR( cudaMalloc((void**)&d__piston_mask, sizeof(int)*N_RAY_TOTAL ) ); <> <> } @ The origin of the chief ray is specified with [[chief_origin]] <>= void bundle::setup(rtd _L_, int _N_L_, vector origin, vector chief_origin, int N_SRC) { strcpy(geom,"box"); N_L = _N_L_; L = _L_; N_BUNDLE = N_SRC; N_RAY = N_L*N_L; N_RAY_TOTAL = N_RAY*N_BUNDLE; rot_angle = 0.0; V.setup(N_RAY_TOTAL); HANDLE_ERROR( cudaMalloc((void**)&d__ray, sizeof(ray)*N_RAY_TOTAL ) ); HANDLE_ERROR( cudaMalloc((void**)&d__origin, sizeof(vector) ) ); HANDLE_ERROR( cudaMemcpy( d__origin, &origin, sizeof(vector), cudaMemcpyHostToDevice ) ); HANDLE_ERROR( cudaMalloc((void**)&d__piston_mask, sizeof(int)*N_RAY_TOTAL ) ); <> <> HANDLE_ERROR( cudaMalloc((void**)&d__chief_origin, sizeof(vector) ) ); HANDLE_ERROR( cudaMemcpy( d__chief_origin, &chief_origin, sizeof(vector), cudaMemcpyHostToDevice ) ); } @ The ray bundle can be set to arbitrary coordinates in the entrance puil \index{source!bundle!setup\_free)} <>= void bundle::setup_free(int _N_RAY_, double *x, double *y, vector origin) { <> double zenith = 0.0; double azimuth = 0.0; <> } void bundle::setup_free(double zenith, double azimuth, int _N_RAY_, double *x, double *y, vector origin) { <> <> } <>= strcpy(geom,"free"); N_BUNDLE = 1; N_RAY = _N_RAY_; N_RAY_TOTAL = N_RAY*N_BUNDLE; rot_angle = 0.0; V.setup(N_RAY_TOTAL); HANDLE_ERROR( cudaMalloc((void**)&d__ray, sizeof(ray)*N_RAY_TOTAL ) ); HANDLE_ERROR( cudaMalloc((void**)&d__origin, sizeof(vector) ) ); HANDLE_ERROR( cudaMemcpy( d__origin, &origin, sizeof(vector), cudaMemcpyHostToDevice ) ); HANDLE_ERROR( cudaMalloc((void**)&d__piston_mask, sizeof(int)*N_RAY_TOTAL ) ); <> <> @ <>= double *d__x, *d__y; int n_byte = sizeof(double)*N_RAY; HANDLE_ERROR( cudaMalloc((void**)&d__x, n_byte ) ); HANDLE_ERROR( cudaMalloc((void**)&d__y, n_byte ) ); HANDLE_ERROR( cudaMemcpy( d__x, x, n_byte, cudaMemcpyHostToDevice ) ); HANDLE_ERROR( cudaMemcpy( d__y, y, n_byte, cudaMemcpyHostToDevice ) ); dim3 blockDim(N_THREAD,N_THREAD); dim3 gridDim(N_RAY/N_THREAD2+1,1, N_BUNDLE); ray_free_coordinates LLL gridDim , blockDim RRR (d__ray, N_RAY, d__x, d__y, d__origin, zenith, azimuth); HANDLE_ERROR( cudaMemset(d__x, 0, n_byte ) ); HANDLE_ERROR( cudaMemset(d__y, 0, n_byte ) ); blockDim = dim3(1,1); gridDim = dim3(1,1, N_BUNDLE); ray_free_coordinates LLL gridDim , blockDim RRR (d__chief_ray, 1, d__x, d__y, d__origin, zenith, azimuth); HANDLE_ERROR( cudaFree( d__x ) ); HANDLE_ERROR( cudaFree( d__y ) ); @ with <>= __global__ void ray_free_coordinates(ray *d__ray, int N_RAY, double *d__x, double *d__y, vector *origin, double zenith, double azimuth) { int i, j, k, iSource; rtd s, theta_x,theta_y; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; iSource = blockIdx.z; k = j * gridDim.x* blockDim.x + i; if (kx; d__ray[k].coordinates.y = d__y[k] + origin->y; d__ray[k].coordinates.z = origin->z; d__ray[k].directions.x = sin(zenith)*cos(azimuth); d__ray[k].directions.y = sin(zenith)*sin(azimuth); d__ray[k].directions.z = -cos(zenith); d__ray[k].optical_path_length = 0.0; d__ray[k].optical_path_difference = theta_x * d__x[k] + theta_y * d__y[k]; d__ray[k].v = 1; s = -origin->z/d__ray[k].directions.z; d__ray[k].coordinates.x -= s*d__ray[k].directions.x; d__ray[k].coordinates.y -= s*d__ray[k].directions.y; } } @ The direction cosines $(k,l,m)$ of the rays are defined from the source zenith $\zeta$ and azimuth $\xi$ angles as \begin{eqnarray} \label{eq:2} k &=& \sin(\zeta)\cos(\xi) \\ l &=& \sin(\zeta)\sin(\xi) \\ k &=& \cos(\zeta) \end{eqnarray} The cartesian coordinates $x$ and $y$ of the rays are given by \begin{eqnarray} x &=& L*(i-([[N_L]]-1)/2)/([[N_L]]-1) \\ y &=& L*(j-([[N_L]]-1)/2)/([[N_L]]-1) \end{eqnarray} <>= <> blockDim = dim3(N_THREAD,N_THREAD); gridDim = dim3(N_L/N_THREAD+1,N_L/N_THREAD+1, N_BUNDLE); ray_coordinates_box LLL gridDim , blockDim RRR (d__ray, N_RAY, src->dev_ptr, L, N_L, d__origin, rot_angle); @ Memory is freed with \index{source!bundle!cleanup} <>= void bundle::cleanup(void) { INFO("@(CEO)>bundle: freeing memory!\n"); HANDLE_ERROR( cudaFree( d__ray ) ); HANDLE_ERROR( cudaFree( d__chief_ray ) ); HANDLE_ERROR( cudaFree( d__origin ) ); HANDLE_ERROR( cudaFree( d__chief_origin ) ); HANDLE_ERROR( cudaFree( d__piston_mask ) ); HANDLE_ERROR( cudaFree( d__sphere_origin ) ); HANDLE_ERROR( cudaFree( d__sphere_radius ) ); HANDLE_ERROR( cudaFree( d__sphere_distance ) ); /*if (d__Vx!=NULL) { HANDLE_ERROR( cudaFree( d__Vx ) ); HANDLE_ERROR( cudaFree( d__Vy ) ); }*/ V.cleanup(); } @ \subsubsection{Propagation parametric geometric equation} \label{sec:prop-param-geom} The rays are propagated to the plane $z=[[z__plane]]$ with \begin{eqnarray} \label{eq:26} s &=& { [[z_plane]] - z \over m} \\\nonumber x &=& x + s k \\\nonumber y &=& y + s l \\\nonumber z &=& [[z_plane]] \end{eqnarray} \index{source!bundle!to\_z\_plane} <>= void bundle::to_z_plane(rtd z_plane) { dim3 blockDim(1,1); dim3 gridDim(1,1, N_BUNDLE); to_z_plane_chief_kernel LLL gridDim , blockDim RRR (d__chief_ray, 1, z_plane); blockDim = dim3(N_THREAD,N_THREAD); gridDim = dim3(N_RAY/N_THREAD2+1,1,N_BUNDLE); to_z_plane_kernel LLL gridDim , blockDim RRR (d__ray, N_RAY, z_plane, d__chief_ray); } @ where <>= __global__ void to_z_plane_kernel(ray *d__ray, int N_RAY, rtd z_plane, ray *d__chief_ray) { int i, j, k0, k, iSource; rtd s; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; iSource = blockIdx.z; k = j * gridDim.x * blockDim.x + i; k0 = k; k += iSource*N_RAY; if ( (k0optical_path_length; } } __global__ void to_vz_plane_kernel(ray *d__ray, int N_RAY, vector *d__v, ray *d__chief_ray) { int i, j, k0, k, iSource; rtd s; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; iSource = blockIdx.z; k = j * gridDim.x * blockDim.x + i; k0 = k; k += iSource*N_RAY; if ( (k0optical_path_length; } } __global__ void to_z_plane_chief_kernel(ray *d__ray, int N_RAY, rtd z_plane) { int k, iSource; rtd s; iSource = blockIdx.z; k = iSource; if (d__ray[k].directions.z==0) return; s = (z_plane - d__ray[k].coordinates.z)/d__ray[k].directions.z; d__ray[k].coordinates.x = d__ray[k].coordinates.x + s*d__ray[k].directions.x; d__ray[k].coordinates.y = d__ray[k].coordinates.y + s*d__ray[k].directions.y; d__ray[k].coordinates.z = z_plane; d__ray[k].optical_path_length = s; } __global__ void to_vz_plane_chief_kernel(ray *d__ray, int N_RAY, vector *d__v) { int k, iSource; rtd s; iSource = blockIdx.z; k = iSource; if (d__ray[k].directions.z==0) return; s = (d__v[k].z - d__ray[k].coordinates.z)/d__ray[k].directions.z; d__ray[k].coordinates.x = d__ray[k].coordinates.x + s*d__ray[k].directions.x; d__ray[k].coordinates.y = d__ray[k].coordinates.y + s*d__ray[k].directions.y; d__ray[k].coordinates.z = d__v[k].z; d__ray[k].optical_path_length = s; } @ The rays are propagated to the plane focal by computing first the coordinates of the intersection of the chief ray with the curved focal plane and then by propagated the rays from the last surface to the z--plane at the intersection of the chief ray with the focal plane. <>= void bundle::to_focal_plane(rtd z_chief_on_axis, rtd rho_focal_plane) { dim3 blockDim(1,1); dim3 gridDim(1,1, N_BUNDLE); <> <> to_vz_plane_chief_kernel LLL gridDim , blockDim RRR (d__chief_ray, 1, d__sphere_origin); blockDim = dim3(N_THREAD,N_THREAD); gridDim = dim3(N_RAY/N_THREAD2+1,1,N_BUNDLE); to_vz_plane_kernel LLL gridDim , blockDim RRR (d__ray, N_RAY, d__sphere_origin, d__chief_ray); } @ \subsubsection{Reference sphere} \label{sec:reference-sphere} \paragraph{Spherical focal plane} \label{sec:spher-focal-plane} \index{source!bundle!to\_sphere} <>= void bundle::to_sphere(rtd z_chief_on_axis, rtd rho_focal_plane) { dim3 blockDim(1,1); dim3 gridDim(1,1, N_BUNDLE); <> <> <> <> <> } @ Given the $z$ location of the focal plane on axis [[z_chief_on_axis]] and the radius of curvature of the focal plane [[rho_focal_plane]], the distance to the object from the last surface for the chief ray is given by <>= to_focal_surface_chief_kernel LLL gridDim , blockDim RRR (d__sphere_distance, d__chief_ray, 1, z_chief_on_axis, rho_focal_plane); @ with <>= __global__ void to_focal_surface_chief_kernel(rtd *s, ray *d__ray, int N_RAY, rtd z_chief_on_axis, rtd rho_focal_plane) { int k, iSource; rtd x, y, z, g, rho2; iSource = blockIdx.z; k = iSource; x = d__ray[k].coordinates.x; y = d__ray[k].coordinates.y; z = d__ray[k].coordinates.z - z_chief_on_axis + rho_focal_plane; g = x*d__ray[k].directions.x + y*d__ray[k].directions.y + z*d__ray[k].directions.z; rho2 = x*x + y*y + z*z; s[k] = -1.0*sqrt( g*g - (rho2-rho_focal_plane*rho_focal_plane) ) - g; } @ The origin of the reference sphere is computed next <>= chief_parametric_equation LLL gridDim , blockDim RRR (d__sphere_origin, d__chief_ray, 1, d__sphere_distance); @ The chief ray defines the location of the object and the reference sphere is centered on the object e.g. the origin of the reference sphere is on the chief ray. If the distance from the last surface to the object is known for the chief ray, then the origin of the reference sphere is given by: <>= __global__ void chief_parametric_equation(vector *d__v, ray *d__ray, int N_RAY, rtd *s) { int k, iSource; iSource = blockIdx.z; k = iSource; d__v[k].x = d__ray[k].coordinates.x + d__ray[k].directions.x*s[k]; d__v[k].y = d__ray[k].coordinates.y + d__ray[k].directions.y*s[k]; d__v[k].z = d__ray[k].coordinates.z + d__ray[k].directions.z*s[k]; } @ The computation of the radius of the reference sphere, which must be tangent to the exit pupil, is: <>= sphere_radius_kernel LLL gridDim , blockDim RRR (d__sphere_radius, d__chief_ray, 1, d__sphere_origin); @ To compute the reference sphere radius, one must know the location of the exit pupil. The location of the exit pupil is given by the intersection of the chief ray with the optical axis after the last surface of the system. Remembering the rays parametric equation: \begin{eqnarray} \label{eq:60} x &=& x_{-1} + k_{-1}s \\ y &=& y_{-1} + l_{-1}s \\ z &=& z_{-1} + m_{-1}s \\ \end{eqnarray} and noting that, at the intersection $x=y=0$, the distance $s$ from the last surface to the exit pupil is given by \begin{equation} \label{eq:61} s = \sqrt{ x_{-1}^2 + y_{-1}^2 \over k_{-1}^2 + l_{-1}^2 } \end{equation} Given the origin of the reference sphere $\left( x_{s,0},y_{s,0},z_{s,0} \right)$, the radius $\varrho_s$ is written \begin{equation} \label{eq:62} \varrho_s = \sqrt{ x_{s,0}^2 + y_{s,0}^2 + \left( z_{s,0} - z_E \right) }, \end{equation} where $z_E$ is the exit pupil coordinate on the z--axis, \begin{equation} \label{eq:63} z_E = z_{-1} + m_{-1}s. \end{equation} In the case of an on--axis source, $k_{-1}=l_{-1}=0$ and $s=\infty$ so the radius is set to $\varrho_s=23.772110269559725$m. <>= __global__ void sphere_radius_kernel(rtd *radius, ray *d__ray, int N_RAY, vector *sphere_origin) { int k, iSource; rtd rho2_xy, rho2_kl, buf, s, z; iSource = blockIdx.z; k = iSource; buf = d__ray[k].coordinates.x; buf *= buf; rho2_xy = buf; buf = d__ray[k].coordinates.y; buf *= buf; rho2_xy += buf; buf = d__ray[k].directions.x; buf *= buf; rho2_kl = buf; buf = d__ray[k].directions.y; buf *= buf; rho2_kl += buf; if (rho2_kl<1E-24) radius[k] = 23.772110269559725; else { s = sqrt(rho2_xy/rho2_kl); z = d__ray[k].coordinates.z + d__ray[k].directions.z*s; buf = sphere_origin[k].x; buf *= buf; radius[k] = buf; buf = sphere_origin[k].y; buf *= buf; radius[k] += buf; buf = sphere_origin[k].z - z; buf *= buf; radius[k] += buf; radius[k] = sqrt( radius[k] ); } } @ The optical path length of the chief ray from the last surface to the reference sphere is computed with <>= to_sphere_chief_kernel LLL gridDim , blockDim RRR (d__chief_ray, 1, d__sphere_origin, d__sphere_radius); @ with <>= __global__ void to_sphere_chief_kernel(ray *d__ray, int N_RAY, vector *sphere_origin, rtd *sphere_radius) { int k, iSource; rtd s, x, y, z, g, rho2; iSource = blockIdx.z; k = iSource; <> } @ The optical path difference (OPD) is usually computed with respect to a reference sphere centered on the object. The OPD is given by the difference between the rays optical path length (OPL) to the sphere and the OPL of the chief ray to the sphere. The OPL $s$ is used to compute the intersection of a ray with the sphere from the parametric equation: \begin{eqnarray} \label{eq:27} x &=& x_{-1} + s k_{-1} \\\nonumber y &=& y_{-1} + s l_{-1} \\\nonumber z &=& z_{-1} + s m_{-1} \end{eqnarray} where $(x_{-1},y_{-1},z_{-1})$ and $(k_{-1},l_{-1},m_{-1})$ are the coordinates at and direction cosines from the last surface. Inserting $x$, $y$ and $z$ in the sphere equation, \begin{equation} \label{eq:28} (x - x_O)^2 + (y-y_O)^2 + (z-z_O)^2 = R^2 \end{equation} where $(x_O,y_O,z_O)$ is the object coordinate and $R$ the sphere radius, and solving for $S$ lead to \begin{equation} \label{eq:29} s = - \gamma - \sqrt{\gamma^2 - (\rho^2 - R^2)} \end{equation} with \begin{equation} \label{eq:30} \gamma = (x_{-1} - x_O)k_{-1} + (y_{-1} - y_O)l_{-1} + (z_{-1} - z_O)m_{-1} \end{equation} and \begin{equation} \label{eq:31} \rho^2 = (x_{-1} - x_O)^2 + (y_{-1} - y_O)^2 + (z_{-1} - z_O)^2. \end{equation} @ The code corresponding to the above equation is: <>= x = d__ray[k].coordinates.x - sphere_origin[iSource].x; y = d__ray[k].coordinates.y - sphere_origin[iSource].y; z = d__ray[k].coordinates.z - sphere_origin[iSource].z; g = x*d__ray[k].directions.x + y*d__ray[k].directions.y + z*d__ray[k].directions.z; rho2 = x*x + y*y + z*z; s = -sqrt( g*g - (rho2-sphere_radius[iSource]*sphere_radius[iSource]) ) - g; d__ray[k].optical_path_length = s; d__ray[k].coordinates.x += s*d__ray[k].directions.x; d__ray[k].coordinates.y += s*d__ray[k].directions.y; d__ray[k].coordinates.z += s*d__ray[k].directions.z; @ The optical path difference of the source rays is computed last <>= blockDim = dim3(N_THREAD,N_THREAD); gridDim = dim3(N_RAY/N_THREAD2+1,1, N_BUNDLE); to_sphere_kernel LLL gridDim , blockDim RRR (d__ray, N_RAY, d__sphere_origin, d__sphere_radius, d__chief_ray); @ with <>= __global__ void to_sphere_kernel(ray *d__ray, int N_RAY, vector *sphere_origin, rtd *sphere_radius, ray *d__chief_ray) { int i, j, k0, k, iSource; rtd s, x, y, z, g, rho2; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; k = j * gridDim.x * blockDim.x + i; iSource = blockIdx.z; k0 = k; k += iSource*N_RAY; if ( (k0> d__ray[k].optical_path_difference += d__ray[k].optical_path_length - d__chief_ray[iSource].optical_path_length; } } @ \paragraph{Reference sphere origin} \label{sec:refer-sphere-orig} Given the origin and the radius of the reference sphere, the optical path difference is computed with \index{source!bundle!to\_sphere} <>= void bundle::to_sphere(vector sphere_origin) { HANDLE_ERROR( cudaMemcpy( d__sphere_origin, &sphere_origin, sizeof(vector), cudaMemcpyHostToDevice ) ); dim3 blockDim(1,1); dim3 gridDim(1,1, N_BUNDLE); <> <> <> } @ <>= void bundle::to_sphere(rtd *s, rtd sphere_radius) { <> } @ \subsubsection{Gathering data} \label{sec:gathering-data} The coordinates, directions, optical path length and vignetting map are copied into arrays with the following functions: <>= <> <> <> <> <> <> <> <> @ <>= <> <> <> <> <> <> <> <> @ \index{source!bundle!get\_coordinates} <>= const void bundle::get_coordinates(double *d__coord) { HANDLE_ERROR( cudaMemset( d__coord, 0, sizeof(double)*N_RAY*3 ) ); dim3 blockDim(N_THREAD,N_THREAD); dim3 gridDim(N_RAY/N_THREAD2+1,1, N_BUNDLE); get_coordinates_kernel LLL gridDim , blockDim RRR (d__coord, d__ray, N_RAY); } @ \index{source!bundle!get\_chief\_coordinates} <>= void bundle::get_chief_coordinates(double *d__coord) { dim3 blockDim(1,1); dim3 gridDim(1,1, N_BUNDLE); get_coordinates_kernel LLL gridDim, blockDim RRR (d__coord, d__chief_ray, 1); } @ <>= __global__ void get_coordinates_kernel(double *d__coord, ray *d__ray, int N_RAY) { int i, j, k0, k, l, iSource; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; k = j * gridDim.x * blockDim.x + i; iSource = blockIdx.z; k0 = k; k += iSource*N_RAY; if ( ( k0>= void bundle::get_sphere_origins(double *d__coord) { dim3 blockDim(1,1); dim3 gridDim(1,1, N_BUNDLE); get_sphere_origins_kernel LLL gridDim, blockDim RRR (d__coord, d__sphere_origin, 1); } @ <>= __global__ void get_sphere_origins_kernel(double *d__coord, vector *d__sphere_origin, int N_RAY) { int i, j, k0, k, l, iSource; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; k = j * gridDim.x * blockDim.x + i; iSource = blockIdx.z; k0 = k; k += iSource*N_RAY; if ( k0>= void bundle::get_directions(double *d__dir) { dim3 blockDim(N_THREAD,N_THREAD); dim3 gridDim(N_RAY/N_THREAD2+1,1,N_BUNDLE); get_directions_kernel LLL gridDim , blockDim RRR (d__dir, d__ray, N_RAY); } @ \index{source!bundle!get\_chief\_directions} <>= void bundle::get_chief_directions(double *d__dir) { dim3 blockDim(1,1); dim3 gridDim(1,1,N_BUNDLE); get_directions_kernel LLL gridDim , blockDim RRR (d__dir, d__chief_ray, 1); } @ <>= __global__ void get_directions_kernel(double *d__dir, ray *d__ray, int N_RAY) { int i, j, k, l, iSource; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; k = j * gridDim.x * blockDim.x + i; iSource = blockIdx.z; if ( k>= void bundle::get_optical_path_length(double *d__opl) { dim3 blockDim(N_THREAD,N_THREAD); dim3 gridDim(N_RAY/N_THREAD2+1,1,N_BUNDLE); get_optical_path_length_kernel LLL gridDim , blockDim RRR (d__opl, d__ray, N_RAY); } @ \index{source!bundle!get\_chief\_optical\_path\_length} <>= void bundle::get_chief_optical_path_length(double *d__opl) { dim3 blockDim(1,1); dim3 gridDim(1,1,N_BUNDLE); get_optical_path_length_kernel LLL gridDim , blockDim RRR (d__opl, d__chief_ray, 1); } @ <>= __global__ void get_optical_path_length_kernel(double *d__opl, ray *d__ray, int N_RAY) { int i, j, k, iSource; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; k = j * gridDim.x * blockDim.x + i; iSource = blockIdx.z; if ( k>= void bundle::get_optical_path_difference(double *d__opd) { HANDLE_ERROR( cudaMemset(d__opd, 0, sizeof(double)*N_RAY_TOTAL ) ); dim3 blockDim(N_THREAD,N_THREAD); dim3 gridDim(N_RAY/N_THREAD2+1,1,N_BUNDLE); get_optical_path_difference_kernel LLL gridDim , blockDim RRR (d__opd, d__ray, N_RAY); } @ <>= __global__ void get_optical_path_difference_kernel(double *d__opd, ray *d__ray, int N_RAY) { int i, j, k, iSource; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; k = j * gridDim.x * blockDim.x + i; iSource = blockIdx.z; j = k; k += iSource*N_RAY; if ( (j>= void bundle::get_n_iteration(int *n_iteration) { HANDLE_ERROR( cudaMemset(n_iteration, 0, sizeof(int)*N_RAY_TOTAL ) ); dim3 blockDim(N_THREAD,N_THREAD); dim3 gridDim(N_RAY/N_THREAD2+1,1,N_BUNDLE); get_n_iteration_kernel LLL gridDim , blockDim RRR (n_iteration, d__ray, N_RAY); } @ <>= __global__ void get_n_iteration_kernel(int *n_iteration, ray *d__ray, int N_RAY) { int i, j, k, iSource; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; k = j * gridDim.x * blockDim.x + i; iSource = blockIdx.z; j = k; k += iSource*N_RAY; if ( (j>= void bundle::get_optical_path_difference(double *d__opd, float const delta_x, int N_x, float const delta_y, int N_y) { /* cublasHandle_t handle; */ /* cublasStatus_t status; */ /* double *d__coord; */ /* int N, idx; */ /* cublasCreate(&handle); */ /* N = N_RAY*3; */ /* HANDLE_ERROR( cudaMalloc((void**)&d__coord, sizeof(double)*N ) ); */ /* get_coordinates(d__coord); */ /* CUBLAS_ERROR( cublasIdamin(handle, N_RAY, d__coord, 3, &idx) ); */ /* printf("x min idx = %d\n",idx); */ /* CUBLAS_ERROR( cublasIdamax(handle, N_RAY, d__coord, 3, &idx) ); */ /* printf("x max idx = %d\n",idx); */ /* CUBLAS_ERROR( cublasIdamin(handle, N_RAY, d__coord+1, 3, &idx) ); */ /* printf("y min idx = %d\n",idx); */ /* CUBLAS_ERROR( cublasIdamax(handle, N_RAY, d__coord+1, 3, &idx) ); */ /* printf("y max idx = %d\n",idx); */ HANDLE_ERROR( cudaMemset(d__opd, 0, sizeof(double)*N_x*N_y ) ); dim3 blockDim(N_THREAD,N_THREAD); dim3 gridDim(N_RAY/N_THREAD2+1,1); get_optical_path_difference_interp_kernel LLL gridDim , blockDim RRR (d__opd, d__ray, N_RAY, delta_x, N_x, delta_y, N_y, L, N_L); /* cublasDestroy(handle); */ /* HANDLE_ERROR( cudaFree( d__coord) ); */ } @ with <>= __global__ void get_optical_path_difference_interp_kernel(double *d__opd, ray* d__ray, int N_RAY, float const delta_x, int N_x, float const delta_y, int N_y, float const L, int const N_L) { /* int i, j, k, kl; float x, y; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; k = j * gridDim.x * blockDim.x + i; if ( (k=N_x) ) { d__opd[kl] = 0; return; } if ( (j<0) || (j>=N_y) ) { d__opd[kl] = 0; return; } d__opd[k] = d__ray[k].optical_path_difference; } */ int i, j, k, l; rtd x, y; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; k = j * gridDim.x * blockDim.x + i; if ( (k=0) && (i=0) && (i>= void bundle::get_vignetting(double *d__v) { dim3 blockDim(N_THREAD,N_THREAD); dim3 gridDim(N_RAY/N_THREAD2+1,1,N_BUNDLE); get_vignetting_kernel LLL gridDim , blockDim RRR (d__v, d__ray, N_RAY); } @ <>= __global__ void get_vignetting_kernel(double *d__v, ray *d__ray, int N_RAY) { int i, j, k, iSource; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; k = j * gridDim.x * blockDim.x + i; iSource = blockIdx.z; if ( k>= void bundle::get_wavefront(source *src) { // printf("Setting wavefront!\n"); dim3 blockDim(N_THREAD,N_THREAD); dim3 gridDim(N_RAY/N_THREAD2+1,1, N_BUNDLE); get_wavefront_kernel LLL gridDim , blockDim RRR (src->wavefront.amplitude, src->wavefront.phase, V.m, d__ray, N_RAY); V.set_filter(); src->wavefront.masked(&V); } @ <>= __global__ void get_wavefront_kernel(float *amplitude, float *phase, char *m, ray *d__ray, int N_RAY) { int i, j, k, iSource; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; iSource = blockIdx.z; k = j * gridDim.x * blockDim.x + i; if ( k>= void bundle::reset(source *src) { dim3 blockDim(1,1); dim3 gridDim(1,1, N_BUNDLE); if (strcmp(geom,"box")==0) <> if (strcmp(geom,"fan")==0) <> } @ with <>= __global__ void reset_kernel(ray *d__ray, int N_RAY) { int i, j, k; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; k = j * gridDim.x * blockDim.x + i; if ( k>= void bundle::gmt_truss_onaxis(const double scale) { int NV = 15, nbyte, k,l; double Vx[] = {-3.011774, -2.446105, -3.011774, -2.799304, -2.33903, -1.566412, -1.640648, -1.65, -1.640648, -1.566412, -2.347462, -1.597649, -1.725044, -2.392888, -2.799304}; double Vy[] = {-2.902158, 0., 2.902158, 3.107604, 0.07244, 0.518512, 0.175429, 0., -0.175429, -0.518512, -0.067572, -3.865336, -3.810188, -0.427592, -3.107604}; double _Vx_, _Vy_, co, so, o; dim3 blockDim(256); dim3 gridDim(N_RAY_TOTAL/256+1); nbyte = NV*sizeof(double); if (d__Vx==NULL) { HANDLE_ERROR( cudaMalloc((void**)&d__Vx, nbyte ) ); HANDLE_ERROR( cudaMalloc((void**)&d__Vy, nbyte ) ); } o = -2*PI/3; for (k=0;k<3;k++) { sincos(o,&so,&co); for (l=0;l>= void bundle::gmt_truss_onaxis_1(const double scale) { int NV = 88; double Vx[] = {-2.73567348, -2.41783814, -2.41783814, -1.97346562, -1.8446836 , -2.34274699, -2.33728303, -2.33606189, -2.33606189, -2.25015407, -2.24170793, -2.239291 , -2.239291 , -2.20436269, -2.20436269, -2.20596966, -2.20596966, -2.15933482, -2.1311272 , -1.95490989, -1.95490989, -1.94667536, -1.94667536, -1.72857749, -1.72857749, -1.73763755, -1.73763755, -1.70680216, -1.70680216, -1.70680216, -1.70680216, -1.73763755, -1.73763755, -1.72857674, -1.72857674, -1.94667536, -1.94667536, -1.95490989, -1.95490989, -2.1311272 , -2.15933482, -2.20596966, -2.20596966, -2.20436269, -2.20436269, -2.239291 , -2.239291 , -2.24170793, -2.25015372, -2.33606189, -2.33606189, -2.33728303, -2.34274699, -1.8446836 , -1.97346562, -2.41784631, -2.41784631, -2.73568714, -2.90993747, -2.54626483, -2.54626483, -2.52838129, -2.51033307, -2.45940238, -2.45940238, -2.45962468, -2.45962468, -2.4599985 , -2.4599985 , -2.4599985 , -2.4599985 , -2.4599985 , -2.4599985 , -2.4599985 , -2.4599985 , -2.4599985 , -2.4599985 , -2.4599985 , -2.4599985 , -2.46235746, -2.46235746, -2.50986898, -2.52712092, -2.52951167, -2.52951167, -2.52926069, -2.52926069, -2.90993746}; double Vy[] = {-3.16376309, -1.33215497, -1.33215497, -3.68764691, -3.75372464, -1.11363274, -0.96603583, -0.95955409, -0.95955409, -0.53782168, -0.53354365, -0.53288407, -0.53288407, -0.43691933, -0.43691933, -0.43633444, -0.43633444, -0.30820625, -0.29740067, -0.39913978, -0.39913978, -0.40029117, -0.40029117, -0.52621003, -0.52621003, -0.54190253, -0.54190253, -0.57168733, -0.57168733, 0.57168733, 0.57168733, 0.54190253, 0.54190253, 0.52620873, 0.52620873, 0.40028944, 0.40028944, 0.39913805, 0.39913805, 0.29739893, 0.30820452, 0.43633271, 0.43633271, 0.4369176 , 0.4369176 , 0.53288234, 0.53288234, 0.53354192, 0.53781996, 0.95955409, 0.95955409, 0.96603583, 1.11363274, 3.75372464, 3.68764691, 1.33211162, 1.33211162, 3.16375128, 3.00425867, 1.22035518, 1.22035518, 1.1213287 , 0.93540246, 0.64318448, 0.64318448, 0.46213519, 0.46213519, 0.46199913, 0.46199913, 0.1576787 , 0.1576787 , 0.15767862, 0.15767862, -0.15767863, -0.15767863, -0.46200087, -0.46200087, -0.64502453, -0.64502453, -0.65668771, -0.65668771, -0.93507524, -1.12263325, -1.1366415 , -1.1366415 , -1.13694571, -1.13694571, -3.00425868}; dim3 blockDim(256); dim3 gridDim(N_RAY_TOTAL/256+1); int nbyte = NV*sizeof(double); HANDLE_ERROR( cudaMalloc((void**)&d__Vx, nbyte ) ); HANDLE_ERROR( cudaMalloc((void**)&d__Vy, nbyte ) ); HANDLE_ERROR( cudaMemcpy( d__Vx, &Vx, nbyte, cudaMemcpyHostToDevice ) ); HANDLE_ERROR( cudaMemcpy( d__Vy, &Vy, nbyte, cudaMemcpyHostToDevice ) ); gmt_truss_onaxis_kern LLL gridDim,blockDim RRR (d__ray, N_RAY_TOTAL, d__Vx, d__Vy, NV); HANDLE_ERROR( cudaFree(d__Vx ) ); HANDLE_ERROR( cudaFree(d__Vy ) ); } void bundle::gmt_truss_onaxis_2(const double scale) { int NV = 81; double Vx[] = {4.10773594, 2.36259912, 2.00525308, 1.99902916, 1.99902916, 1.59084427, 1.58291632, 1.58113664, 1.58113664, 1.48056459, 1.48056459, 1.48086154, 1.48086154, 1.34658185, 1.32312013, 1.32312013, 1.32312013, 1.32 , 1.32 , 1.32 , 1.32 , 1.33812013, 1.33812013, 1.34849683, 1.34849683, 0.35830533, 0.35830533, 0.39951742, 0.39951742, 0.40857824, 0.40857824, 0.62667686, 0.62667686, 0.63179126, 0.63179126, 0.80800857, 0.81275446, 0.72510962, 0.72510962, 0.72379961, 0.72379961, 0.65815586, 0.65815586, 0.65879311, 0.65931112, 0.33703273, 0.33703273, 0.33202995, 0.05528065, 0.05528065, -1.37204541, -1.1467956 , 0.21627383, 0.21627383, 0.2930915 , 0.44508424, 0.67268709, 0.67268709, 0.82959152, 0.82959152, 0.82989626, 0.82989626, 1.09344549, 1.09344549, 1.09344556, 1.09344556, 1.36655295, 1.36655295, 1.63010374, 1.63010374, 1.78860688, 1.78860688, 1.79988697, 1.79988697, 2.06473341, 2.23578938, 2.24911625, 2.24911625, 2.24925421, 2.24925421, 4.05673307}; double Vy[] = {-0.78728119, -1.42783176, -1.54112857, -1.5433119 , -1.5433119 , -1.67977975, -1.67460419, -1.67284086, -1.67284086, -1.69057443, -1.69057443, -1.69225855, -1.69225855, -1.71593568, -1.69690996, -1.49343173, -1.49343173, -1.48572473, -1.48572473, -1.233887 , -1.233887 , -1.233887 , -1.233887 , -1.19229036, -1.19229036, -1.76397769, -1.76397769, -1.77578953, -1.77578953, -1.76009573, -1.76009573, -1.88601503, -1.88601503, -1.89257065, -1.89257065, -1.99430976, -2.02414107, -2.12859212, -2.12859212, -2.12749289, -2.12749289, -2.20572406, -2.20572406, -2.20814697, -2.21760026, -2.50286599, -2.50286599, -2.50716439, -2.75997214, -2.75997214, -3.9510502 , -4.02220911, -2.81530762, -2.81530762, -2.75030678, -2.64171344, -2.45149718, -2.45149718, -2.36116505, -2.36116505, -2.36142076, -2.36142076, -2.20926055, -2.20926055, -2.20926051, -2.20926051, -2.05158188, -2.05158188, -1.89942076, -1.89942076, -1.80790893, -1.80790893, -1.80412026, -1.80412026, -1.70607268, -1.62723429, -1.62230061, -1.62230061, -1.62193116, -1.62193116, -1.01795043}; dim3 blockDim(256); dim3 gridDim(N_RAY_TOTAL/256+1); int nbyte = NV*sizeof(double); HANDLE_ERROR( cudaMalloc((void**)&d__Vx, nbyte ) ); HANDLE_ERROR( cudaMalloc((void**)&d__Vy, nbyte ) ); HANDLE_ERROR( cudaMemcpy( d__Vx, &Vx, nbyte, cudaMemcpyHostToDevice ) ); HANDLE_ERROR( cudaMemcpy( d__Vy, &Vy, nbyte, cudaMemcpyHostToDevice ) ); gmt_truss_onaxis_kern LLL gridDim,blockDim RRR (d__ray, N_RAY_TOTAL, d__Vx, d__Vy, NV); HANDLE_ERROR( cudaFree(d__Vx ) ); HANDLE_ERROR( cudaFree(d__Vy ) ); } void bundle::gmt_truss_onaxis_3(const double scale) { int NV = 84; double Vx[] = {-1.37206246, 0.05523902, 0.33202995, 0.33703273, 0.33703273, 0.6593098 , 0.65879161, 0.65815436, 0.65815436, 0.72379811, 0.72379811, 0.72510812, 0.72510812, 0.81275296, 0.80800707, 0.63178976, 0.63178976, 0.62667536, 0.62667536, 0.40857749, 0.40857749, 0.39951742, 0.39951742, 0.35830533, 0.35830533, 1.34849683, 1.34849683, 1.33812013, 1.33812013, 1.3199985 , 1.3199985 , 1.3199985 , 1.3199985 , 1.32311863, 1.32311863, 1.32311863, 1.34658035, 1.48086004, 1.48086004, 1.48056309, 1.48056309, 1.58113514, 1.58113514, 1.58291482, 1.5908426 , 1.99902916, 1.99902916, 2.00525308, 2.13580774, 4.1731627 , 4.18032871, 2.36256566, 2.36256566, 4.10773255, 4.05673307, 2.32999101, 2.32999101, 2.23528979, 2.06524883, 1.78671528, 1.78671528, 1.63003316, 1.63003316, 1.63010224, 1.63010224, 1.36655301, 1.36655301, 1.36655294, 1.36655294, 1.09344555, 1.09344555, 0.82989476, 0.82989476, 0.67139162, 0.67139162, 0.66247049, 0.66247049, 0.44513558, 0.29133155, 0.28039542, 0.28039542, 0.28000648, 0.28000648, -1.1467956}; double Vy[] = {3.95104427, 2.75998673, 2.50716439, 2.50286599, 2.50286599, 2.21760143, 2.20814784, 2.20572493, 2.20572493, 2.12749376, 2.12749376, 2.12859299, 2.12859299, 2.02414193, 1.99431063, 1.89257151, 1.89257151, 1.8860159 , 1.8860159 , 1.76009703, 1.76009703, 1.77578953, 1.77578953, 1.76397769, 1.76397769, 1.19229036, 1.19229036, 1.233887 , 1.233887 , 1.233887 , 1.233887 , 1.48572559, 1.48572559, 1.4934326 , 1.4934326 , 1.69691083, 1.71593655, 1.69225941, 1.69225941, 1.69057529, 1.69057529, 1.67284173, 1.67284173, 1.67460505, 1.67978031, 1.5433119 , 1.5433119 , 1.54112857, 1.47206203, -0.27931946, -0.13475209, 1.42786052, 1.42786052, 0.78729892, 1.01795043, 1.59495244, 1.59495244, 1.62897808, 1.70631098, 1.8083127 , 1.8083127 , 1.89902986, 1.89902986, 1.89942163, 1.89942163, 2.05158184, 2.05158184, 2.05158188, 2.05158188, 2.20926051, 2.20926051, 2.36142163, 2.36142163, 2.45293346, 2.45293346, 2.46080797, 2.46080797, 2.64114792, 2.74986754, 2.75894211, 2.75894211, 2.75887686, 2.75887686, 4.02220911}; dim3 blockDim(256); dim3 gridDim(N_RAY_TOTAL/256+1); int nbyte = NV*sizeof(double); HANDLE_ERROR( cudaMalloc((void**)&d__Vx, nbyte ) ); HANDLE_ERROR( cudaMalloc((void**)&d__Vy, nbyte ) ); HANDLE_ERROR( cudaMemcpy( d__Vx, &Vx, nbyte, cudaMemcpyHostToDevice ) ); HANDLE_ERROR( cudaMemcpy( d__Vy, &Vy, nbyte, cudaMemcpyHostToDevice ) ); gmt_truss_onaxis_kern LLL gridDim,blockDim RRR (d__ray, N_RAY_TOTAL, d__Vx, d__Vy, NV); HANDLE_ERROR( cudaFree(d__Vx ) ); HANDLE_ERROR( cudaFree(d__Vy ) ); } void bundle::gmt_truss_onaxis() { double scale = 1.; gmt_truss_onaxis_1(scale); gmt_truss_onaxis_2(scale); gmt_truss_onaxis_3(scale); dim3 blockDim(256); dim3 gridDim(N_RAY_TOTAL/256+1); gmt_pfa_bearing_motor LLL gridDim,blockDim RRR (d__ray, N_RAY_TOTAL, 0.1235, -2.438934996, -1.036172321, scale); gmt_pfa_bearing_motor LLL gridDim,blockDim RRR (d__ray, N_RAY_TOTAL, 0.1235, -2.438934996, 1.036172321, scale); } @ with the device kernels <>= __global__ void gmt_truss_onaxis_kern(ray *d__ray, int NP, double *Vx, double *Vy, int NV) { int i,w; double rho2, R2; R2 = 1.65*1.65; i = blockIdx.x * blockDim.x + threadIdx.x; if (i>= __global__ void gmt_pfa_bearing_motor(ray *d__ray, int NP, const double R, const double x0, const double y0, const double scale) { int i; double rho2, R2, x, y; R2 = R*R; i = blockIdx.x * blockDim.x + threadIdx.x; if (i>= void bundle::gmt_m2_baffle(void) { dim3 blockDim(256); dim3 gridDim(N_RAY_TOTAL/256+1); gmt_m2_baffle_kern LLL gridDim,blockDim RRR (d__ray,N_RAY_TOTAL); } @ with the device kernels <>= __global__ void gmt_m2_baffle_kern(ray *d__ray, int NP) { int i; double rho2, R2; R2 = 1.8*1.8; i = blockIdx.x * blockDim.x + threadIdx.x; if (i>= void source::setup(const char *_photometric_band, float _zenith, float _azimuth, float _height) { <> wavefront.setup(0); strcpy(tag,"source"); info(); } @ \item for a single source with wavefront resolution: <>= void source::setup(const char *_photometric_band, float _zenith, float _azimuth, float _height, int resolution) { <> wavefront.setup(resolution); strcpy(tag,"source"); info(); } @ \item for a single source with tag: <>= void source::setup(const char *_photometric_band, float _zenith, float _azimuth, float _height, const char *tag_in) { <> wavefront.setup(0); strcpy(tag,tag_in); info(); } @ \item for a single source with wavefront resolution and tag: <>= void source::setup(const char *_photometric_band, float _zenith, float _azimuth, float _height, int resolution, const char *tag_in) { <> wavefront.setup(resolution); strcpy(tag,tag_in); info(); } @ with <>= rays_exist = 0; N_SRC = 1; zenith = _zenith; azimuth = _azimuth; height = _height; theta_x = tanf(zenith)*cosf(azimuth); theta_y = tanf(zenith)*sinf(azimuth); photometric_band = _photometric_band; magnitude = 0.0; fwhm = 0.0; source __src; __src.zenith = zenith; __src.azimuth = azimuth; __src.height = height; __src.photometric_band = photometric_band; __src.magnitude = magnitude; __src.N_PHOTON = 1.0;//__src.n_photon(); __src.theta_x = theta_x; __src.theta_y = theta_y; HANDLE_ERROR( cudaMalloc( (void**)&dev_ptr, sizeof(source) ) ); HANDLE_ERROR( cudaMemcpy( dev_ptr, &__src, sizeof(source) , cudaMemcpyHostToDevice ) ); @ \item for multiple sources: <>= void source::setup(const char *_photometric_band, float *_zenith, float *_azimuth, float _height, int _N_SRC) { rays_exist = 0; N_SRC = _N_SRC; height = _height; photometric_band = _photometric_band; magnitude = 0.0; fwhm = 0.0; source __src; HANDLE_ERROR( cudaMalloc( (void**)&dev_ptr, sizeof(source)*N_SRC ) ); strcpy(tag,"sources"); INFO("\n\x1B[1;42m@(CEO)>%s:\x1B[;42m\n",tag); INFO(" zen[arcsec] azim[deg] height[m] lambda[micron] magnitude\n"); for (int i_SRC=0;i_SRC>= void source::setup(const char *_photometric_band, float *_zenith, float *_azimuth, float _height, int _N_SRC, int resolution) { rays_exist = 0; N_SRC = _N_SRC; height = _height; photometric_band = _photometric_band; magnitude = 0.0; fwhm = 0.0; source __src; HANDLE_ERROR( cudaMalloc( (void**)&dev_ptr, sizeof(source)*N_SRC ) ); strcpy(tag,"sources"); INFO("\n\x1B[1;42m@(CEO)>%s:\x1B[;42m\n",tag); INFO(" zen[arcsec] azim[deg] height[m] lambda[micron] magnitude\n"); for (int i_SRC=0;i_SRC>= void source::setup(const char *_photometric_band, float *_zenith, float *_azimuth, float _height, int _N_SRC, rtd L, int N_L, vector origin) { <> INFO("\n\x1B[1;42m@(CEO)>%s:\x1B[;42m\n",tag); INFO(" zen[arcsec] azim[deg] height[m] lambda[micron] magnitude\n"); for (int i_SRC=0;i_SRC> __src.magnitude = magnitude; __src.N_PHOTON = n_photon(magnitude); HANDLE_ERROR( cudaMemcpy( dev_ptr + i_SRC, &__src, sizeof(source) , cudaMemcpyHostToDevice ) ); INFO(" %5.2f %6.2f %8.2f %5.3f %4.1f\n", _zenith[i_SRC]*RADIAN2ARCSEC, _azimuth[i_SRC]*180/PI,_height, wavelength_micron(),magnitude); } wavefront.setup(N_L*N_L,N_SRC); INFO(" wavefront pixel sampling: %d\n",wavefront.N_PX); rays_exist = 1; rays.setup(L, N_L, origin, N_SRC); reset_rays(); INFO("----------------------------------------------------\x1B[0m\n"); } @ with <>= N_SRC = _N_SRC; height = _height; photometric_band = _photometric_band; magnitude = 0.0; fwhm = 0.0; source __src; HANDLE_ERROR( cudaMalloc( (void**)&dev_ptr, sizeof(source)*N_SRC ) ); strcpy(tag,"sources"); @ and with <>= __src.zenith = (float) _zenith[i_SRC]; __src.azimuth = (float) _azimuth[i_SRC]; __src.theta_x = tanf(__src.zenith)*cosf(__src.azimuth); __src.theta_y = tanf(__src.zenith)*sinf(__src.azimuth); __src._zenith_64_ = _zenith[i_SRC]; __src._azimuth_64_ = _azimuth[i_SRC]; __src._theta_x_64_ = tan(__src._zenith_64_)*cos(__src._azimuth_64_); __src._theta_y_64_ = tan(__src._zenith_64_)*sin(__src._azimuth_64_); __src.height = _height; __src.photometric_band = _photometric_band; @ \item for multiple sources with wavefront resolution and with ray bundle (different magnitudes): <>= void source::setup(const char *_photometric_band, float *_magnitude, float *_zenith, float *_azimuth, float _height, int _N_SRC, rtd L, int N_L, vector origin) { <> INFO("\n\x1B[1;42m@(CEO)>%s:\x1B[;42m\n",tag); INFO(" zen[arcsec] azim[deg] height[m] lambda[micron] magnitude\n"); for (int i_SRC=0;i_SRC> __src.magnitude = _magnitude[i_SRC]; __src.N_PHOTON = n_photon(_magnitude[i_SRC]); HANDLE_ERROR( cudaMemcpy( dev_ptr + i_SRC, &__src, sizeof(source) , cudaMemcpyHostToDevice ) ); INFO(" %5.2f %6.2f %8.2f %5.3f %4.1f\n", _zenith[i_SRC]*RADIAN2ARCSEC, _azimuth[i_SRC]*180/PI,_height, wavelength_micron(),_magnitude[i_SRC]); } wavefront.setup(N_L*N_L,N_SRC); INFO(" wavefront pixel sampling: %d\n",wavefront.N_PX); rays_exist = 1; rays.setup(L, N_L, origin, N_SRC); reset_rays(); INFO("----------------------------------------------------\x1B[0m\n"); } @ \item for multiple sources with wavefront resolution and with ray bundle (different magnitudes), zenith and azimuth are in double precision for ray tracing: <>= void source::setup(const char *_photometric_band, float *_magnitude, rtd *_zenith, rtd *_azimuth, float _height, int _N_SRC, rtd L, int N_L, vector origin) { <> INFO("\n\x1B[1;42m@(CEO)>%s:\x1B[;42m\n",tag); INFO(" zen[arcsec] azim[deg] height[m] lambda[micron] magnitude\n"); for (int i_SRC=0;i_SRC> __src.magnitude = _magnitude[i_SRC]; __src.N_PHOTON = n_photon(_magnitude[i_SRC]); HANDLE_ERROR( cudaMemcpy( dev_ptr + i_SRC, &__src, sizeof(source) , cudaMemcpyHostToDevice ) ); INFO(" %5.2f %6.2f %8.2f %5.3f %4.1f\n", _zenith[i_SRC]*RADIAN2ARCSEC, _azimuth[i_SRC]*180/PI,_height, wavelength_micron(),_magnitude[i_SRC]); } wavefront.setup(N_L*N_L,N_SRC); INFO(" wavefront pixel sampling: %d\n",wavefront.N_PX); rays_exist = 1; rays.setup(L, N_L, origin, N_SRC); reset_rays(); INFO("----------------------------------------------------\x1B[0m\n"); } void source::setup_chief(const char *_photometric_band, float *_magnitude, rtd *_zenith, rtd *_azimuth, float _height, int _N_SRC, rtd L, int N_L, vector origin, vector chief_origin) { <> INFO("\n\x1B[1;42m@(CEO)>%s:\x1B[;42m\n",tag); INFO(" zen[arcsec] azim[deg] height[m] lambda[micron] magnitude\n"); for (int i_SRC=0;i_SRC> __src.magnitude = _magnitude[i_SRC]; __src.N_PHOTON = n_photon(_magnitude[i_SRC]); HANDLE_ERROR( cudaMemcpy( dev_ptr + i_SRC, &__src, sizeof(source) , cudaMemcpyHostToDevice ) ); INFO(" %5.2f %6.2f %8.2f %5.3f %4.1f\n", _zenith[i_SRC]*RADIAN2ARCSEC, _azimuth[i_SRC]*180/PI,_height, wavelength_micron(),_magnitude[i_SRC]); } wavefront.setup(N_L*N_L,N_SRC); INFO(" wavefront pixel sampling: %d\n",wavefront.N_PX); rays_exist = 1; rays.setup(L, N_L, origin, chief_origin, N_SRC); reset_rays(); INFO("----------------------------------------------------\x1B[0m\n"); } @ with \index{source!source!reset\_rays} <>= void source::reset_rays(void) { <> blockDim = dim3(256); gridDim = dim3(rays.V.nel/256+1); fill_ones_char LLL gridDim,blockDim RRR (rays.V.m,rays.V.nel); rays.V.set_filter_quiet(); } @ with <>= dim3 blockDim(1,1); dim3 gridDim(1,1, rays.N_BUNDLE); ray_coordinates LLL gridDim , blockDim RRR (rays.d__chief_ray, 1, dev_ptr, 0.0, 2, 1, rays.d__chief_origin, rays.rot_angle); if (rays.N_RAY>1) { blockDim = dim3(16,16); gridDim = dim3(rays.N_L/16+1,rays.N_L/16+1, rays.N_BUNDLE); ray_coordinates_box LLL gridDim , blockDim RRR (rays.d__ray, rays.N_RAY, dev_ptr, rays.L, rays.N_L, rays.d__origin, rays.rot_angle); } else ray_coordinates LLL gridDim , blockDim RRR (rays.d__ray, 1, dev_ptr, 0.0, 2, 1, rays.d__origin, rays.rot_angle); @ and <>= __global__ void ray_coordinates(ray *d__ray, int N_RAY, source *src, rtd RADIUS, int N_RADIUS, int N_THETA, vector *origin, rtd rot_angle) { int i, j, k, iSource; rtd rho, theta, s; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; iSource = blockIdx.z; //k = j * gridDim.x * blockDim.x + i; if ( (ix; d__ray[k].coordinates.y = rho*sin(theta) + origin->y; <> } } @ where <>= d__ray[k].coordinates.z = origin->z; d__ray[k].directions.x = sin(src[iSource]._zenith_64_)*cos(src[iSource]._azimuth_64_); d__ray[k].directions.y = sin(src[iSource]._zenith_64_)*sin(src[iSource]._azimuth_64_); d__ray[k].directions.z = -cos(src[iSource]._zenith_64_); d__ray[k].optical_path_length = 0.0; d__ray[k].optical_path_difference = 0.0; s = -origin->z/d__ray[k].directions.z; d__ray[k].coordinates.x -= s*d__ray[k].directions.x; d__ray[k].coordinates.y -= s*d__ray[k].directions.y; d__ray[k].v = 1; d__ray[k].throughput = 1.0; @ and <>= __global__ void ray_coordinates_box(ray *d__ray, int N_RAY, source *src, rtd L, int N_L, vector *origin, rtd rot_angle) { int i, j, k, iSource; rtd x, y, s; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; iSource = blockIdx.z; if ( (i> k = i + j*N_L + iSource*N_RAY; d__ray[k].coordinates.z = origin->z; d__ray[k].directions.x = sin(src[iSource]._zenith_64_)*cos(src[iSource]._azimuth_64_); d__ray[k].directions.y = sin(src[iSource]._zenith_64_)*sin(src[iSource]._azimuth_64_); d__ray[k].directions.z = -cos(src[iSource]._zenith_64_); d__ray[k].optical_path_length = 0.0; d__ray[k].optical_path_difference = 0.0; s = -origin->z/d__ray[k].directions.z; x -= s*d__ray[k].directions.x; y -= s*d__ray[k].directions.y; d__ray[k].v = 1; d__ray[k].throughput = 1.0; d__ray[k].coordinates.x = x;//*co - y*so; d__ray[k].coordinates.y = y;//x*so + y*co; } } @ with <>= rtd so, co, _x_, _y_; sincos(rot_angle,&so,&co); _x_ = L*(i - (N_L-1)*0.5)/(N_L-1); _y_ = L*(j - (N_L-1)*0.5)/(N_L-1); _x_ += origin->x; _y_ += origin->y; x = _x_*co - _y_*so; y = _x_*so + _y_*co; @ \item for multiple sources with wavefront resolution and with ray bundle coordinates (different magnitudes), zenith and azimuth are in double precision for ray tracing: <>= void source::setup(const char *_photometric_band, float *_magnitude, rtd *_zenith, rtd *_azimuth, float _height, int _N_SRC, int N_RAY, double *x, double *y, vector origin) { <> INFO("\n\x1B[1;42m@(CEO)>%s:\x1B[;42m\n",tag); INFO(" zen[arcsec] azim[deg] height[m] lambda[micron] magnitude\n"); for (int i_SRC=0;i_SRC> __src.magnitude = _magnitude[i_SRC]; __src.N_PHOTON = n_photon(_magnitude[i_SRC]); HANDLE_ERROR( cudaMemcpy( dev_ptr + i_SRC, &__src, sizeof(source) , cudaMemcpyHostToDevice ) ); INFO(" %5.2f %6.2f %8.2f %5.3f %4.1f\n", _zenith[i_SRC]*RADIAN2ARCSEC, _azimuth[i_SRC]*180/PI,_height, wavelength_micron(),_magnitude[i_SRC]); } INFO(" N_RAY: %d\n", N_RAY); wavefront.setup(N_RAY,N_SRC); INFO(" wavefront pixel sampling: %d\n",wavefront.N_PX); rays_exist = 1; rays.setup_free(N_RAY, x, y, origin); INFO("----------------------------------------------------\x1B[0m\n"); } @ \end{itemize} The rays vignetting mask [[V]] is not reset if the flag [[RESET_RAYS_MASK]] is set to 0: <>= void source::reset_rays(int RESET_RAYS_MASK) { <> blockDim = dim3(256); gridDim = dim3(rays.V.nel/256+1); if (RESET_RAYS_MASK>0) { fill_ones_char LLL gridDim,blockDim RRR (rays.V.m,rays.V.nel); rays.V.set_filter_quiet(); } } @ The zenith and azimuth angle are updated with: \index{source!source!update\_directions} <>= void source::update_directions(double *zenith, double *azimuth, int N_DIR) { int n_byte = sizeof(double)*N_DIR; double *d__zenith, *d__azimuth; HANDLE_ERROR( cudaMalloc((void**)&d__zenith, n_byte ) ); HANDLE_ERROR( cudaMalloc((void**)&d__azimuth, n_byte ) ); HANDLE_ERROR( cudaMemcpy( d__zenith, zenith, n_byte, cudaMemcpyHostToDevice ) ); HANDLE_ERROR( cudaMemcpy( d__azimuth, azimuth, n_byte, cudaMemcpyHostToDevice ) ); update_directions_kernel LLL N_SRC,1 RRR ( d__zenith, d__azimuth, N_DIR, dev_ptr); HANDLE_ERROR( cudaFree( d__zenith )); HANDLE_ERROR( cudaFree( d__azimuth ));} @ with <>= __global__ void update_directions_kernel(double *d__zenith, double *d__azimuth, const int N_DIR, source *d__src) { int i; i = blockIdx.x; d__src[i].zenith = (float)d__zenith[i]; d__src[i].azimuth = (float)d__azimuth[i]; d__src[i].theta_x = tanf(d__src[i].zenith)*cosf(d__src[i].azimuth); d__src[i].theta_y = tanf(d__src[i].zenith)*sinf(d__src[i].azimuth); d__src[i]._zenith_64_ = d__zenith[i]; d__src[i]._azimuth_64_ = d__azimuth[i]; d__src[i]._theta_x_64_ = tan(d__src[i]._zenith_64_)*cos(d__src[i]._azimuth_64_); d__src[i]._theta_y_64_ = tan(d__src[i]._zenith_64_)*sin(d__src[i]._azimuth_64_); } @ The magnitude is update with \index{source!source!update\_magnitude} <>= void source::update_magnitude(float *magnitude, int N_MAG) { int n_byte = sizeof(float)*N_MAG; float *d__magnitude; <> HANDLE_ERROR( cudaMalloc((void**)&d__magnitude, n_byte ) ); HANDLE_ERROR( cudaMemcpy( d__magnitude, magnitude, n_byte, cudaMemcpyHostToDevice ) ); update_magnitude_kernel LLL N_SRC,1 RRR ( d__magnitude, zero_point, N_MAG, dev_ptr); HANDLE_ERROR( cudaFree( d__magnitude )); } @ with <>= __global__ void update_magnitude_kernel(float *d__magnitude, const float zero_point, const int N_MAG, source *d__src) { int i; i = blockIdx.x; d__src[i].magnitude = d__magnitude[i]; d__src[i].N_PHOTON = zero_point*powf(10 , -0.4*d__magnitude[i] ); } @ The magnitude and number of photon from another source can be copied with \index{source!source!copy\_magnitude} <>= void source::copy_magnitude(source *other_src) { copy_magnitude_kernel LLL N_SRC,1 RRR ( dev_ptr , other_src->dev_ptr ); } @ with <>= __global__ void copy_magnitude_kernel(source *this_src, source *other_src) { int i; i = blockIdx.x; this_src[i].magnitude = other_src[i].magnitude; this_src[i].N_PHOTON = other_src[i].N_PHOTON; } @ \subsubsection{Photometry} \label{sec:photometry} The [[wavelength]] in meter corresponding to the photometric band is given by \index{source!source!wavelength} <>= float source::wavelength(void) { float lambda; if (strcmp(photometric_band,"Vs")==0) lambda = 0.500; if (strcmp(photometric_band,"V")==0) lambda = 0.550; if (strcmp(photometric_band,"R")==0) lambda = 0.640; if (strcmp(photometric_band,"I")==0) lambda = 0.790; if (strcmp(photometric_band,"J")==0) lambda = 1.215; if (strcmp(photometric_band,"H")==0) lambda = 1.654; if (strcmp(photometric_band,"K")==0) lambda = 2.179; if (strcmp(photometric_band,"Ks")==0) lambda = 2.157; if (strcmp(photometric_band,"R+I")==0) lambda = 0.715; if (strcmp(photometric_band,"PHZ")==0) lambda = 1.193; if (strcmp(photometric_band,"VIS")==0) lambda = 0.750; return 1E-6*lambda; } @ The [[wavelength]] in \textsl{micron} is given by \index{source!source!wavelength\_micron} <>= float source::wavelength_micron(void) { return 1E6*wavelength(); } @ The number of photon in $m^{-2}.s^{-1}$ is derived with \index{source!source!n\_photon} <>= float source::n_photon(void) { <> return zero_point*powf(10 , -0.4*magnitude ); } float source::n_photon(float _magnitude_) { <> return zero_point*powf(10 , -0.4*_magnitude_ ); } @ with <>= float zero_point; if (strcmp(photometric_band,"Vs")==0) zero_point = 8.97e9; if (strcmp(photometric_band,"V")==0) zero_point = 8.97e9; if (strcmp(photometric_band,"R")==0) zero_point = 10.87e9 ; if (strcmp(photometric_band,"I")==0) zero_point = 7.34e9; if (strcmp(photometric_band,"J")==0) zero_point = 5.16e9; if (strcmp(photometric_band,"H")==0) zero_point = 2.99e9; if (strcmp(photometric_band,"K")==0) zero_point = 1.90e9; if (strcmp(photometric_band,"Ks")==0) zero_point = 1.49e9; if (strcmp(photometric_band,"R+I")==0) zero_point = 24.46e9; if (strcmp(photometric_band,"PHZ")==0) zero_point = 7.21e9; if (strcmp(photometric_band,"VIS")==0) zero_point = 16.9e9; @ The number of background photon in $ray^{-1}s^{-1}.arcsec^{-2}$ for a given magnitude per arcsecond square is derived with \index{source!source!n\_background\_photon} <>= float source::n_background_photon(float backgroundMagnitude) { <> return rays.V.area*zero_point*powf(10 , -0.4*backgroundMagnitude )/N_SRC; } @ The spectral bandwidth is derived with \index{source!source!spectral\_bandwidth} <>= float source::spectral_bandwidth(void) { float spectral_bandwidth; if (strcmp(photometric_band,"Vs")==0) spectral_bandwidth = 0.090; if (strcmp(photometric_band,"V")==0) spectral_bandwidth = 0.090; if (strcmp(photometric_band,"R")==0) spectral_bandwidth = 0.150; if (strcmp(photometric_band,"I")==0) spectral_bandwidth = 0.150; if (strcmp(photometric_band,"J")==0) spectral_bandwidth = 0.260; if (strcmp(photometric_band,"H")==0) spectral_bandwidth = 0.290; if (strcmp(photometric_band,"K")==0) spectral_bandwidth = 0.410; if (strcmp(photometric_band,"Ks")==0) spectral_bandwidth = 0.320; if (strcmp(photometric_band,"R+I")==0) spectral_bandwidth = 0.300; if (strcmp(photometric_band,"PHZ")==0) spectral_bandwidth = 0.331; if (strcmp(photometric_band,"VIS")==0) spectral_bandwidth = 0.300; return spectral_bandwidth*1e-6; } @ The wave number is given by \index{source!source!wavenumber} <>= float source::wavenumber(void) { float lambda; if (strcmp(photometric_band,"Vs")==0) lambda = 0.550; if (strcmp(photometric_band,"V")==0) lambda = 0.550; if (strcmp(photometric_band,"R")==0) lambda = 0.640; if (strcmp(photometric_band,"I")==0) lambda = 0.790; if (strcmp(photometric_band,"J")==0) lambda = 1.215; if (strcmp(photometric_band,"H")==0) lambda = 1.654; if (strcmp(photometric_band,"K")==0) lambda = 2.179; if (strcmp(photometric_band,"Ks")==0) lambda = 2.157; if (strcmp(photometric_band,"R+I")==0) lambda = 0.715; if (strcmp(photometric_band,"PHZ")==0) lambda = 1.193; if (strcmp(photometric_band,"VIS")==0) lambda = 0.750; return 1E6*2*PI/lambda; } @ \subsubsection{Trace} \label{sec:trace} The ray tracing data are transferred to the wavefront structure with \index{source!source!opd2phase} <>= void source::opd2phase(void) { HANDLE_ERROR( cudaMemset(rays.V.m, 0, sizeof(char)*rays.N_RAY*rays.N_BUNDLE ) ); <> } @ %def opd2phase @ with <>= dim3 blockDim(16,16); dim3 gridDim(rays.N_L/16+1, rays.N_L/16+1, rays.N_BUNDLE); opd2phase_kernel LLL gridDim , blockDim RRR (wavefront.amplitude, wavefront.phase, rays.V.m, rays.d__ray, rays.N_RAY, rays.L, rays.N_L, rays.d__origin, dev_ptr, rays.rot_angle); wavefront.masked(&(rays.V)); rays.V.set_filter_quiet(); @ and <>= __global__ void opd2phase_kernel(float *amplitude, float *phase, char *m, ray *d__ray, int N_RAY, rtd L, int N_L, vector *origin, source *src,rtd rot_angle) { int i, j, k, iSource; rtd x, y; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; iSource = blockIdx.z; k = i + j*N_L + iSource*N_RAY; if ( ( (i> m[k] = 1; phase[k] += d__ray[k].optical_path_difference + x*src[iSource].theta_x + y*src[iSource].theta_y; amplitude[k] *= d__ray[k].throughput; } } @ For ray tracing in sequential mode, the rays vignetting mask [[V]] should not be reset. The source [[reset_rays]] method should be called instead of the [[reset]] method with 0 passed as argument. Then [[opd2phase]] must be called sequentially with also 0 passed as argument. This will prevent the rays vignetting mask [[V]] to be resetted. <>= void source::opd2phase(int RESET_RAYS_MASK) { if (RESET_RAYS_MASK>0) HANDLE_ERROR( cudaMemset(rays.V.m, 0, sizeof(char)*rays.N_RAY*rays.N_BUNDLE ) ); else wavefront.reset_amplitude(); <> } @ \subsubsection{Input/Output} \label{sec:inputoutput} The main parameters of the source are displayed with the [[info]] routine: \index{source!source!info} <>= void source::info(void) { #ifndef SILENT fprintf(stdout,"\n\x1B[1;42m@(CEO)>%s:\x1B[;42m\n",tag); fprintf(stdout," zen[arcsec] azim[deg] height[m] lambda[micron] magnitude\n"); fprintf(stdout," %5.2f %6.2f %8.2f %5.3f %4.1f\n", zenith*RADIAN2ARCSEC, azimuth*180.0/PI,height,wavelength_micron(), magnitude); if (wavefront.N_PX>0) fprintf(stdout," wavefront pixel sampling: %d\n",wavefront.N_PX); fprintf(stdout,"----------------------------------------------------\x1B[0m\n"); #endif } @ \subsection{Complex amplitude} \label{sec:complex-amplitude} \index{source!complex\_amplitude} The complex amplitude structure is initialized with \index{source!complex\_amplitude!setup} <>= void complex_amplitude::setup(int n_pixel) { N = 1; <> } <>= void complex_amplitude::setup(int n_pixel, int n_src) { N = n_src; <> } <>= N_PX = n_pixel*N; M = NULL; buffer = NULL; cublasCreate(&handle); if (N_PX>0) { HANDLE_ERROR( cudaMalloc( (void**)&litude, sizeof(float)*N_PX ) ); HANDLE_ERROR( cudaMalloc( (void**)&phase, sizeof(float)*N_PX ) ); reset(); } else { amplitude = NULL; phase = NULL; } @ and memory is de--allocated with: \index{source!complex\_amplitude!cleanup} <>= void complex_amplitude::cleanup(void) { if (amplitude!=NULL) HANDLE_ERROR( cudaFree( amplitude ) ); if (phase!=NULL) HANDLE_ERROR( cudaFree( phase ) ); if (buffer!=NULL) HANDLE_ERROR( cudaFree( buffer ) ); cublasDestroy(handle); } @ Allocated variable are freed with the [[cleanup]] routine \index{source!source!cleanup} <>= void source::cleanup(void) { INFO("@(CEO)>%s: freeing memory!\n",tag); // fprintf(stdout," |-"); wavefront.cleanup(); if (rays_exist==1) { INFO(" |-"); rays.cleanup(); } HANDLE_ERROR( cudaFree( dev_ptr) ); } @ The amplitude and phase are initialized to respectively 1 and 0: <>= __global__ void squarePupil(float* amplitude, const int N) { int id; id = blockIdx.x * blockDim.x + threadIdx.x; if (id>= void complex_amplitude::reset(void) { reset_amplitude(); reset_phase(); } @ The amplitude is reset to 1 with: \index{source!complex\_amplitude!reset\_amplitude} <>= void complex_amplitude::reset_amplitude(void) { dim3 blockDim(16,1); dim3 gridDim(N_PX/16+1,1); squarePupil LLL gridDim,blockDim RRR (amplitude, N_PX); } @ The wavefront amplitude can also be reset to a given wavefront, both wavefronts will share the same mask,: <>= void complex_amplitude::reset(complex_amplitude *wavefront) { if (N_PX==wavefront->N_PX) { HANDLE_ERROR( cudaMemcpy( phase, wavefront->phase, sizeof(float)*N_PX, cudaMemcpyDeviceToDevice ) ); HANDLE_ERROR( cudaMemcpy( amplitude, wavefront->amplitude, sizeof(float)*N_PX, cudaMemcpyDeviceToDevice ) ); M = wavefront->M; } else { fprintf(stdout,"\n\x1B[31m@(CEO)>WARNING: Wavefront resolutions are not matching!\x1B[0m\n"); } } @ The following resets the wavefront phase to the phase of the new wavefront: <>= void complex_amplitude::reset_phase(complex_amplitude *wavefront_prime) { reset_phase(); add_phase(1, wavefront_prime->phase); } @ The wavefront phase is reset to 0 with \index{source!complex\_amplitude!reset\_phase} <>= void complex_amplitude::reset_phase(void) { HANDLE_ERROR( cudaMemset( phase, 0, sizeof(float)*N_PX ) ); } @ \paragraph{Adding wavefront phase} \label{sec:adding-wavefr-phase} The following adds a phase aberration to the wavefront phase $$\varphi = \varphi + \alpha \varphi^\prime$$: \index{source!complex\_amplitude!add\_phase} <>= void complex_amplitude::add_phase(float alpha, float *phase_prime) { CUBLAS_ERROR( cublasSaxpy(handle, N_PX, &alpha, phase_prime, 1, phase, 1) ); if (M!=NULL) masked(); } @ Or the same phase $\varphi^\prime$ can be added to wavefront phase of each source $$\varphi_i = \varphi_i + \alpha \varphi^\prime$$: \index{source!complex\_amplitude!add\_phase} <>= void complex_amplitude::add_same_phase(float alpha, float *phase_prime) { int k, n_pixel = N_PX/N; for (k=0;k>= void complex_amplitude::masked(void) { <> } @ <>= void complex_amplitude::masked(mask *M_in) { M = M_in; <> } @ %def masked @ with <>= dim3 blockDim(16,1); dim3 gridDim(M->nel/16+1,N_PX/M->nel); apply_mask LLL gridDim,blockDim RRR (amplitude, phase, N_PX, M->m, M->nel); @ and with the kernel: <>= __global__ void apply_mask(float* amplitude, float* phase, const int N, const char *pupil_mask, const int M) { int i,j; i = blockIdx.x * blockDim.x + threadIdx.x; if ( (i>= void complex_amplitude::rms(float *rms) { if (M==NULL) ERROR("Wavefront mask is undefined!"); float mean_data, nnz; int n_data = N_PX/N, idx, k; cublasHandle_t handle; cublasCreate(&handle); for (k=0;kf + idx, 1, &mean_data) ); CUBLAS_ERROR( cublasSasum(handle, n_data, M->f + idx, 1, &nnz) ); mean_data /= nnz; CUBLAS_ERROR( cublasSdot(handle, n_data, phase + idx, 1, phase + idx, 1, rms+k) ); rms[k] /= nnz; rms[k] -= (mean_data*mean_data); rms[k] = sqrt(rms[k]); } cublasDestroy(handle); } @ \paragraph{Wavefront differentiation} \label{sec:wavefr-diff} @ \index{source!complex\_amplitude!finite\_difference} For a $[[NL]]\times [[NL]]$ lenslet array of pitch [[d]] meter, the lenslet x and y average finite difference is given by: <>= void complex_amplitude::finite_difference(float *sx, float *sy, int NL,float d) { <> wavefront_finite_difference(sx, sy, NL, phase, n, d, N); } @ The number of wavefront pixel [[n]] per lenslet must verify $n_{px}=[[NL]][[n]]+1$ where $n_{px}$ is the linear number of pixel on the wavefront, it verifies $n_{px}^2[[N]]=[[N_PX]]$ <>= int n; n = (int) sqrt(N_PX/N); n -= 1; n/= NL; @ The mask of valid lenslets can be passed to the same routine and the slopes outside the mask are set to 0: <>= void complex_amplitude::finite_difference(float *sx, float *sy, int NL, float d, mask *valid_lenslet) { <> //stopwatch tid; //tid.tic(); wavefront_finite_difference(sx, sy, NL, phase, n, d, valid_lenslet, N); //tid.toc("Finite difference (lenslet edge method)"); if (M!=NULL) { //tid.tic(); <> //tid.toc("Finite difference (partial illumination identification)"); //tid.tic(); <> //tid.toc("Finite difference (gradient average method)"); } } @ If the wavefront pupil mask [[M]] exists, the partially illuminated lenslets are identified and the fraction of the illuminated lenslet surface is saved in [[M->f]]: <>= dim3 blockDim(16,16); dim3 gridDim(NL/16+1,NL/16+1,N); partial_illumination_identification LLL gridDim,blockDim RRR (valid_lenslet->f, valid_lenslet->m, M->f, NL, n); //HANDLE_ERROR( cudaDeviceSynchronize() ); @ with the kernel <>= __global__ void partial_illumination_identification(float *valid_lenslet_f, char *valid_lenslet_m, float *wavefront_mask_f, const int NL, const int n) { int iL, jL, i, j, kL, u, v, w0, w, NP, iSource, n2; iL = blockIdx.x * blockDim.x + threadIdx.x; jL = blockIdx.y * blockDim.y + threadIdx.y; iSource = blockIdx.z; kL = iL*NL+jL; kL += iSource*NL*NL; if ( ( (iL0) ) { kL = iL*NL+jL; NP = n*NL + 1; kL += iSource*NL*NL; u = iL*n; v = jL*n; w0 = iSource*NP*NP; n2 = (n+1)*(n+1); valid_lenslet_f[kL] = 0; for (i=0;i<=n;i++) { for (j=0;j<=n;j++) { w = w0 + (u+i)*NP + v + j; valid_lenslet_f[kL] += wavefront_mask_f[w]; } } valid_lenslet_f[kL] /= n2; } } @ The lenslet average centroid on a partially illuminated lenslet is given by \begin{eqnarray} \label{eq:1} s_x(i_L,j_L) &=& {1\over S} \sum_{i=0}^n \sum_{j=0}^n P_{i+1,j}P_{i-1,j} { \varphi_{i+1,j} - \varphi_{i-1,j} \over 2p}, \\ s_y(i_L,j_L) &=& {1\over S} \sum_{i=0}^n \sum_{j=0}^n P_{i,j+1}P_{i,j-1} { \varphi_{i,j+1} - \varphi_{i,j-1} \over 2p}, \end{eqnarray} with \begin{equation} \label{eq:3} S = \sum_{i=0}^n \sum_{j=0}^n P_{i,j+1}P_{i,j-1}, \end{equation} where $P_{i,j}$ is equal to 1 inside the wavefront pupil mask and 0 outside and $p=d/n$ is the pupil pixel size. <>= partial_illumination_slopes LLL gridDim,blockDim RRR (sx, sy, NL, n, d, phase, valid_lenslet->f, valid_lenslet->m, M->f); @ with the kernel <>= __global__ void partial_illumination_slopes(float *sx, float *sy, const int NL, const int n, float d, float *phase, float *valid_lenslet_f, char *valid_lenslet_m, float *wavefront_mask_f) { int iL, jL, i, j, kL, uL, vL, a, b, w0, w, NP, iSource; float p, dphi, h, P, S; iL = blockIdx.x * blockDim.x + threadIdx.x; jL = blockIdx.y * blockDim.y + threadIdx.y; iSource = blockIdx.z; kL = iL*NL+jL; kL += iSource*NL*NL; if ( ( (iL0 && ((valid_lenslet_f[kL]>0) && (valid_lenslet_f[kL]<1)) ) ) { kL = iL*NL+jL; NP = n*NL + 1; kL += 2*iSource*NL*NL; uL = iL*n; vL = jL*n; w0 = iSource*NP*NP; sx[kL] = sy[kL] = 0.0; p = d/n; S = 0; for (i=0;i<=n;i++) { a = uL + i + 1; h = 2*p; if (a>=NP) { a = NP-1; h = p; } b = uL + i - 1; if (b<0) { b = 0; h = p; } for (j=0;j<=n;j++) { w = w0 + b*NP + vL + j; dphi = phase[w]; P = wavefront_mask_f[w]; w = w0 + a*NP + vL + j; dphi -= phase[w]; P *= wavefront_mask_f[w]; S += P; sx[kL] += P*dphi/h; } } if (S>0) sx[kL] /= S; else sx[kL] = 0; S = 0; for (i=0;i<=n;i++) { w = w0 + (uL+i)*NP; for (j=0;j<=n;j++) { a = vL + j + 1; h = 2*p; if (a>=NP) { a = NP-1; h = p; } b = vL + j - 1; if (b<0) { b = 0; h = p; } dphi = phase[w+b]; P = wavefront_mask_f[w+a]; dphi -= phase[w+a]; P *= wavefront_mask_f[w+b]; S += P; sy[kL] += P*dphi/h; } } if (S>0) sy[kL] /= S; else sy[kL] = 0; } } @ \index{source!complex\_amplitude!gradient\_average} The wavefront finite difference can be computed with a straight lenslet--average of the phase gradient with: <>= void complex_amplitude::gradient_average(float *sx, float *sy, int NL, float d) { <> dim3 blockDim(16,16); dim3 gridDim(NL/16+1,NL/16+1,N); gradient_average_kernel LLL gridDim,blockDim RRR (sx, sy, NL, n, d, phase, M->f); } @ <>= void complex_amplitude::gradient_average(float *sx, float *sy, float d) { int k, N2, idx, NL = 1; <> N2 = (n+1)*(n+1); float *d__sx, *d__sy, *d__Px, *d__Py; int n_byte = sizeof(float)*N_PX; if (buffer==NULL) HANDLE_ERROR( cudaMalloc((void**)&buffer, 4*n_byte) ); d__sx = buffer; d__sy = buffer + N_PX; d__Px = buffer + N_PX*2; d__Py = buffer + N_PX*3; dim3 blockDim(16,16); dim3 gridDim((n+1)/16+1,(n+1)/16+1,N); gradient_kernel LLL gridDim,blockDim RRR (d__sx, d__sy, d__Px, d__Py, NL, n, d, phase, M->f); float res, nnz; for (k=0; k>= __global__ void gradient_average_kernel(float *sx, float *sy, const int NL, const int n, float d, float *phase, float *wavefront_mask_f) { int iL, jL, i, j, kL, uL, vL, a, b, w0, w, NP, iSource; float p, dphi, h, P, S; iL = blockIdx.x * blockDim.x + threadIdx.x; jL = blockIdx.y * blockDim.y + threadIdx.y; iSource = blockIdx.z; kL = iL*NL+jL; kL += iSource*NL*NL; if ( (iL=NP) { a = NP-1; h = p; } b = uL + i - 1; if (b<0) { b = 0; h = p; } for (j=0;j<=n;j++) { w = w0 + b*NP + vL + j; dphi = phase[w]; P = wavefront_mask_f[w]; w = w0 + a*NP + vL + j; dphi -= phase[w]; P *= wavefront_mask_f[w]; S += P; sx[kL] += P*dphi/h; } } if (S>0) sx[kL] /= S; else sx[kL] = 0.0;; S = 0; for (i=0;i<=n;i++) { w = w0 + (uL+i)*NP; for (j=0;j<=n;j++) { a = vL + j + 1; h = 2*p; if (a>=NP) { a = NP-1; h = p; } b = vL + j - 1; if (b<0) { b = 0; h = p; } dphi = phase[w+b]; P = wavefront_mask_f[w+a]; dphi -= phase[w+a]; P *= wavefront_mask_f[w+b]; S += P; sy[kL] += P*dphi/h; } } if (S>0) sy[kL] /= S; else sy[kL] = 0.0;; } } @ <>= __global__ void gradient_kernel(float *sx, float *sy, float *Px, float *Py, const int NL, const int n, float d, float *phase, float *wavefront_mask_f) { int i, j, k, a, b, w0, w, NP, iSource; float p, dphi, h, P; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; iSource = blockIdx.z; if ( (i<(n+1)) && (j<(n+1)) ) { k = i*(n+1)+j; NP = n*NL + 1; k += iSource*(n+1)*(n+1); w0 = iSource*NP*NP; sx[k] = sy[k] = 0.0; p = d/n; a = i + 1; h = 2*p; if (a>=NP) { a = NP-1; h = p; } b = i - 1; if (b<0) { b = 0; h = p; } w = w0 + b*NP + j; dphi = phase[w]; P = wavefront_mask_f[w]; w = w0 + a*NP + j; dphi -= phase[w]; P *= wavefront_mask_f[w]; sx[k] += P*dphi/h; Px[k] = P; w = w0 + i*NP; a = j + 1; h = 2*p; if (a>=NP) { a = NP-1; h = p; } b = j - 1; if (b<0) { b = 0; h = p; } dphi = phase[w+b]; P = wavefront_mask_f[w+a]; dphi -= phase[w+a]; P *= wavefront_mask_f[w+b]; sy[k] += P*dphi/h; Py[k] = P; } } @ <>= __global__ void segments_gradient_kernel(float *sx, float *sy, float *Px, float *Py, const int NL, const int n, float d, float *phase, float *wavefront_mask_f, int *segment_mask, int iSegment) { int i, j, k, a, b, w0, w, NP, iSource; float p, dphi, h, P; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; iSource = blockIdx.z; NP = n*NL + 1; k = i*NP+j; k += iSource*NP*NP; if ( ( (i=NP) { a = NP-1; h = p; } b = i - 1; if (b<0) { b = 0; h = p; } w = w0 + b*NP + j; dphi = phase[w]; P = wavefront_mask_f[w]; w = w0 + a*NP + j; dphi -= phase[w]; P *= wavefront_mask_f[w]; sx[k] += P*dphi/h; Px[k] = P; w = w0 + i*NP; a = j + 1; h = 2*p; if (a>=NP) { a = NP-1; h = p; } b = j - 1; if (b<0) { b = 0; h = p; } dphi = phase[w+b]; P = wavefront_mask_f[w+a]; dphi -= phase[w+a]; P *= wavefront_mask_f[w+b]; sy[k] += P*dphi/h; Py[k] = P; } } @ \index{source!complex\_amplitude!segments\_gradient\_average} The gradient of the wavefront phase on each GMT segments is computed with: <>= void complex_amplitude::segments_gradient_average(float *sx, float *sy, float D, int *segment_markers) { int NL = 1; <> dim3 blockDim(1,1); dim3 gridDim(7,N); segments_gradient_average_kernel LLL gridDim,blockDim RRR (sx, sy, n, D, phase, M->f, segment_markers); } @ <>= void complex_amplitude::segments_gradient_averageFast(float *sx, float *sy, float D, int *segment_markers) { int k, N2, idx, segment_id, NL = 1; <> N2 = (n+1)*(n+1); float *d__sx, *d__sy, *d__Px, *d__Py; int n_byte = sizeof(float)*N_PX*4; if (buffer==NULL) HANDLE_ERROR( cudaMalloc((void**)&buffer, n_byte) ); d__sx = buffer; d__sy = buffer + N_PX; d__Px = buffer + N_PX*2; d__Py = buffer + N_PX*3; dim3 blockDim(16,16); dim3 gridDim((n+1)/16+1,(n+1)/16+1,N); float res, nnz; for (segment_id=1; segment_id<8; segment_id++) { HANDLE_ERROR( cudaMemset( buffer, 0, n_byte) ); segments_gradient_kernel LLL gridDim,blockDim RRR (d__sx, d__sy, d__Px, d__Py, NL, n, D, phase, M->f, segment_markers, segment_id); for (k=0; k>= __global__ void segments_gradient_average_kernel(float *sx, float *sy, const int n, float d, float *phase, float *wavefront_mask_f, int *segment_mask) { int i, j, kL0, kL, a, b, w0, w, NP, iSource; float p, dphi, h, P, S; kL0 = blockIdx.x; iSource = blockIdx.y; NP = n + 1; kL = kL0 + 2*iSource*7; w0 = iSource*NP*NP; sx[kL] = sy[kL] = 0.0; p = d/n; S = 0; for (i=0;i<=n;i++) { a = i + 1; h = 2*p; if (a>=NP) { a = NP-1; h = p; } b = i - 1; if (b<0) { b = 0; h = p; } for (j=0;j<=n;j++) { if (segment_mask[w0+i*NP+j]==(kL0+1)) { w = w0 + b*NP + j; dphi = phase[w]; P = wavefront_mask_f[w]; w = w0 + a*NP + j; dphi -= phase[w]; P *= wavefront_mask_f[w]; S += P; sx[kL] += P*dphi/h; } } } if (S>0) sx[kL] /= S; else sx[kL] = 0.0;; S = 0; for (i=0;i<=n;i++) { w = w0 + i*NP; for (j=0;j<=n;j++) { if (segment_mask[w0+i*NP+j]==(kL0+1)) { a = j + 1; h = 2*p; if (a>=NP) { a = NP-1; h = p; } b = j - 1; if (b<0) { b = 0; h = p; } dphi = phase[w+b]; P = wavefront_mask_f[w+a]; dphi -= phase[w+a]; P *= wavefront_mask_f[w+b]; S += P; sy[kL] += P*dphi/h; } } } if (S>0) sy[kL] /= S; else sy[kL] = 0.0; } @ \subsubsection{Optical transfer function} \label{sec:otf} The optical transfer function is given by the auto--correlation of the wavefront complex amplitude $Phi(\vec r)$: \begin{eqnarray} \label{eq:10} O(\vec\rho) &=& \iint {\mathrm d}\vec r \Phi(\vec r) \Phi(\vec r + \vec\rho) \\ &=& {\mathcal F}^{-1} \left[ \left| {\mathcal F} \left[ \Phi(\vec r) \right] \left( \vec f \right)\right|^2 \right] \left( \vec\rho \right) \end{eqnarray} @ <>= void source::optical_transfer_function(float2 *d__otf) { int _N_PX_, N_OTF, n_byte; float2 * d__wavefront; cufftHandle plan; _N_PX_ = (int) sqrt(wavefront.N_PX/wavefront.N); N_OTF = 2*_N_PX_-1; n_byte = sizeof(float2)*N_OTF*N_OTF*wavefront.N; HANDLE_ERROR( cudaMalloc((void**)&d__wavefront, n_byte) ); HANDLE_ERROR( cudaMemset(d__otf, 0, n_byte ) ); HANDLE_ERROR( cudaMemset(d__wavefront, 0, n_byte ) ); int n_DFT[2] = {N_OTF, N_OTF}; int iodist = N_OTF*N_OTF; int BATCH = wavefront.N; HANDLE_ERROR_CUFFT( cufftPlanMany(&plan, 2, n_DFT, NULL, 1, iodist, NULL, 1, iodist, CUFFT_C2C,BATCH), "Unable to create plan"); dim3 blockDim(16,16); dim3 gridDim(_N_PX_/16+1,_N_PX_/16+1,wavefront.N); wavefront_padding LLL gridDim,blockDim RRR (d__wavefront, N_OTF, wavefront.amplitude, wavefront.phase, _N_PX_, wavenumber()); //HANDLE_ERROR( cudaMemcpy( d__otf, d__wavefront, n_byte, cudaMemcpyDeviceToDevice ) ); HANDLE_ERROR_CUFFT( cufftExecC2C(plan, d__wavefront, d__otf, CUFFT_FORWARD), "Unable to execute plan forward FT"); gridDim = dim3(N_OTF/16+1,N_OTF/16+1,wavefront.N); square_modulus LLL gridDim,blockDim RRR (d__wavefront, d__otf, N_OTF); HANDLE_ERROR_CUFFT( cufftExecC2C(plan, d__wavefront, d__otf, CUFFT_INVERSE), "Unable to execute plan forward FT"); HANDLE_ERROR(cudaFree(d__wavefront)); cufftDestroy(plan); } @ <>= __global__ void wavefront_padding(float2 *wavefront, const int N_OTF, const float *amplitude, const float *phase, const int N_PX, const float wavenumber) { int i,j,k,kw,iSource,ii,jj; float so,co; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; iSource = blockIdx.z; k = i*N_PX+j; k += iSource*N_PX*N_PX; if ( (i>= __global__ void square_modulus(float2 *wavefront, float2 *otf, const int N_OTF) { int i,j,k,iSource,N2; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; iSource = blockIdx.z; N2 = N_OTF; N2 *= N2; if ( (i>= void source::phase2file(const char *filename) { dev2file(filename,wavefront.phase,wavefront.N_PX,wavefront.N_PX/N_SRC,N_SRC); } @ The wavefront phase is plotted with the plot.ly <>= void complex_amplitude::show_phase(char *filename) { char title[1024]; float *data, *d__data, alpha, piston; plotly_properties prop; stats S; HANDLE_ERROR( cudaMalloc((void**)&d__data, sizeof(float)*N_PX) ); HANDLE_ERROR( cudaMemcpy( d__data, phase, sizeof(float)*N_PX, cudaMemcpyDeviceToDevice ) ); S.setup(); alpha = 1E9; CUBLAS_ERROR( cublasSscal(S.handle, N_PX, &alpha, d__data, 1) ); if (M!=NULL) { int k, nel; char rms[32]; nel = N_PX/N; dim3 blockDim(256); dim3 gridDim(N_PX/256+1); sprintf(title,"RMS="); for (k=0;km, nel, piston); sprintf(rms,"%.2fnm ",S.std(d__data+ k*nel,M,nel)); strcat(title,rms); } prop.set("title",title); } S.cleanup(); HANDLE_ERROR( cudaHostAlloc( (void**)&data, sizeof(float)*N_PX, cudaHostAllocDefault) ); HANDLE_ERROR( cudaMemcpy( data, d__data, sizeof(float)*N_PX, cudaMemcpyDeviceToHost ) ); prop.set("xtitle","X"); prop.set("ytitle","Y"); prop.set("ztitle","[nm]"); prop.set("filename",filename); int n = (int) sqrtf(N_PX/N); prop.aspect_ratio = N; prop.set("zdata",data,n,n*N); prop.set("colorscale","Portland"); imagesc(&prop); HANDLE_ERROR( cudaFree( d__data ) ); HANDLE_ERROR( cudaFreeHost( data ) ); } @ with the kernel <>= __global__ void remove_piston(float *data, const char* mask, int n_data, float p) { int i; i = blockIdx.x * blockDim.x + threadIdx.x; if ( (i0) ) data[i] -= p; } @ The wavefront amplitude is plotted with the plot.ly <>= void complex_amplitude::show_amplitude(char *filename) { float *data; plotly_properties prop; HANDLE_ERROR( cudaHostAlloc( (void**)&data, sizeof(float)*N_PX, cudaHostAllocDefault) ); HANDLE_ERROR( cudaMemcpy( data, amplitude, sizeof(float)*N_PX, cudaMemcpyDeviceToHost ) ); prop.set("xtitle","X"); prop.set("ytitle","Y"); prop.set("ztitle","[au]"); prop.set("filename",filename); int n = (int) sqrtf(N_PX/N); prop.aspect_ratio = N; prop.set("zdata",data,n,n*N); imagesc(&prop); HANDLE_ERROR( cudaFreeHost( data ) ); } <>= void complex_amplitude::show_amplitude(char *filename, int N, int M) { float *data; plotly_properties prop; HANDLE_ERROR( cudaHostAlloc( (void**)&data, sizeof(float)*N_PX, cudaHostAllocDefault) ); HANDLE_ERROR( cudaMemcpy( data, amplitude, sizeof(float)*N_PX, cudaMemcpyDeviceToHost ) ); prop.set("xtitle","X"); prop.set("ytitle","Y"); prop.set("ztitle","[au]"); prop.set("filename",filename); prop.aspect_ratio = M/N; prop.set("zdata",data,N,M); imagesc(&prop); HANDLE_ERROR( cudaFreeHost( data ) ); } @ \subsection{PSSn} \label{sec:PSSn} \index{source!pssn} The optical transfer function is given by the auto--correlation of the wavefront complex amplitude $Phi(\vec r)$: \begin{eqnarray} \label{eq:10} O(\vec\rho) &=& \iint {\mathrm d}\vec r \Phi(\vec r) \Phi(\vec r + \vec\rho) \\ &=& {\mathcal F}^{-1} \left[ \left| {\mathcal F} \left[ \Phi(\vec r) \right] \left( \vec f \right)\right|^2 \right] \left( \vec\rho \right) \end{eqnarray} The PSSn is a structure with the following parameters: \begin{itemize} \item the telescope optical transfer functions (OTF), both current [[O]] et reference [[O0]], the structure may contain [[N_O]] current OTF and $[[N_O0=1]]$ or [[N_O]] reference OTF: <>= int N_O, N_O0, n_byte; float2 *d__O, *d__O0, *buffer; @ \item the atmosphere optical transfer function [[C]]: <>= float2 *d__C; @ \item the wavefront complex amplitude [[W]] of size $[[N_PX]]\times [[N_PX]]\times [[N]]$: <>= int N_PX, N; float2 *d__W; @ \item the discrete Fourier transform (DFT) plan handle [[plan]], the size of each DFT is $[[N_OTF]]\times [[N_OTF]]$ where $[[N_OTF]]=2[[N_PX]]-1$ and there as may DFT as wavefront i.e. [[N]]: <>= int N_OTF, N_OTF2, NN; cufftHandle plan; @ \item algebraic operator handle and results: <>= cublasHandle_t handle; float num, *denom; @ \end{itemize} The PPSn structure is: <>= struct pssn { <> void setup(source *src, const float r0, const float L0); void cleanup(); void __otf__(source *src, float2 *_d__O_); void otf(source *src); void atm_otf(float d, float r0, float L0); float eval(void); float oeval(void); void eval(float *results); void oeval(float *results); void xotf(float *buffer); void O(float *buffer); void O0(float *buffer); void C(float *buffer); void B(float *buffer); }; @ The pssn methods are: \begin{description} \item[[[setup]]] <>= void pssn::setup(source *src, const float r0, const float L0) { N_O = 0; N = src->wavefront.N; denom = (float *) malloc(sizeof(float)*N); N_PX = (int) sqrt(src->wavefront.N_PX/N); N_OTF = 2*N_PX-1; N_OTF2 = N_OTF*N_OTF; NN = N_OTF2*N; n_byte = sizeof(float2)*NN; HANDLE_ERROR( cudaMalloc((void**)&d__W, n_byte) ); HANDLE_ERROR( cudaMalloc((void**)&d__O, n_byte) ); HANDLE_ERROR( cudaMalloc((void**)&d__O0, n_byte) ); HANDLE_ERROR( cudaMalloc((void**)&d__C, n_byte) ); HANDLE_ERROR( cudaMalloc((void**)&buffer, n_byte) ); HANDLE_ERROR( cudaMemset(d__O, 0, n_byte ) ); HANDLE_ERROR( cudaMemset(d__O0, 0, n_byte ) ); HANDLE_ERROR( cudaMemset(d__W, 0, n_byte ) ); HANDLE_ERROR( cudaMemset(d__C, 0, n_byte ) ); HANDLE_ERROR( cudaMemset(buffer, 0, n_byte ) ); int n_DFT[2] = {N_OTF, N_OTF}; int iodist = N_OTF*N_OTF; int BATCH = N; HANDLE_ERROR_CUFFT( cufftPlanMany(&plan, 2, n_DFT, NULL, 1, iodist, NULL, 1, iodist, CUFFT_C2C,BATCH), "Unable to create plan"); CUBLAS_ERROR( cublasCreate(&handle) ); __otf__(src, d__O0); float d = src->rays.L/(src->rays.N_L-1); atm_otf(d,r0,L0); dim3 blockDim(16,16); dim3 gridDim(N_OTF/16+1,N_OTF/16+1,N); OC_times LLL gridDim,blockDim RRR (buffer,d__O0,d__C,N_OTF); for (int k=0;k>= void pssn::cleanup() { INFO("@(CEO)>%s: freeing memory!\n","PSSn"); CUBLAS_ERROR(cublasDestroy(handle)); cufftDestroy(plan); HANDLE_ERROR(cudaFree(d__C)); HANDLE_ERROR(cudaFree(d__W)); HANDLE_ERROR(cudaFree(d__O0)); HANDLE_ERROR(cudaFree(d__O)); HANDLE_ERROR(cudaFree(buffer)); free(denom); } @ \item[[[otf]]] <>= void pssn::__otf__(source *src, float2 *_d__O_) { dim3 blockDim(16,16); dim3 gridDim(N_PX/16+1,N_PX/16+1,N); wavefront_padding LLL gridDim,blockDim RRR (d__W, N_OTF, src->wavefront.amplitude, src->wavefront.phase, N_PX, src->wavenumber()); HANDLE_ERROR_CUFFT( cufftExecC2C(plan, d__W, _d__O_, CUFFT_FORWARD), "Unable to execute plan forward FT"); gridDim = dim3(N_OTF/16+1,N_OTF/16+1,N); square_modulus LLL gridDim,blockDim RRR (d__W, _d__O_, N_OTF); HANDLE_ERROR_CUFFT( cufftExecC2C(plan, d__W, _d__O_, CUFFT_INVERSE), "Unable to execute plan forward FT"); } void pssn::otf(source *src) { float2 a; HANDLE_ERROR( cudaMemset(d__W, 0, n_byte ) ); HANDLE_ERROR( cudaMemset(buffer, 0, n_byte ) ); __otf__(src, buffer); ++N_O; a.x= (N_O-1.0)/N_O; a.y = 0.0; CUBLAS_ERROR( cublasCsscal(handle,NN,&a.x,d__O,1) ); a.x= 1.0/N_O; CUBLAS_ERROR( cublasCaxpy(handle,NN,&a,buffer,1,d__O,1) ); } @ \item[[[atm_otf]]] <>= void pssn::atm_otf(float d, float r0, float L0) { dim3 blockDim(16,16); dim3 gridDim(N_OTF/16+1,N_OTF/16+1,N); atm_otf_kernel LLL gridDim,blockDim RRR (d__C,N_OTF,d,r0,L0); } @ <>= __global__ void atm_otf_kernel(float2 *C, const int N, const float d, const float r0, const float L0) { int i,j,k,kw,iSource,ii,jj; float x,y,r,sf,red,a,b; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; iSource = blockIdx.z; k = i*N+j; k += iSource*N*N; if ( (i>= __global__ void OC_times(float2 *buffer, float2 *_O_, float2 *_C_, const int N) { int i,j,k,iSource; i = blockIdx.x * blockDim.x + threadIdx.x; j = blockIdx.y * blockDim.y + threadIdx.y; iSource = blockIdx.z; k = i*N+j; k += iSource*N*N; if ( (i>= float pssn::eval(void) { float result; dim3 blockDim(16,16); dim3 gridDim(N_OTF/16+1,N_OTF/16+1,N); OC_times LLL gridDim,blockDim RRR (buffer,d__O,d__C,N_OTF); CUBLAS_ERROR( cublasScnrm2( handle , NN, buffer, 1 , &num) ); result = num/denom[0]; result *= result; return result; } float pssn::oeval(void) { float result; CUBLAS_ERROR( cublasScnrm2( handle , NN, d__O, 1 , &num) ); result = num/denom[0]; result *= result; return result; } void pssn::eval(float *results) { int k, N_OTF2; float result; dim3 blockDim(16,16); dim3 gridDim(N_OTF/16+1,N_OTF/16+1,N); OC_times LLL gridDim,blockDim RRR (buffer,d__O,d__C,N_OTF); N_OTF2 = N_OTF*N_OTF; for (k=0;k>= void pssn::xotf(float *buffer) { HANDLE_ERROR( cudaMemcpy( buffer, d__O, sizeof(float2)*NN, cudaMemcpyDeviceToDevice ) ); } void pssn::O(float *buffer) { HANDLE_ERROR( cudaMemcpy( buffer, d__O, sizeof(float2)*NN, cudaMemcpyDeviceToDevice ) ); } @ \item[[[O0]]] <>= void pssn::O0(float *buffer) { HANDLE_ERROR( cudaMemcpy( buffer, d__O0, sizeof(float2)*NN, cudaMemcpyDeviceToDevice ) ); } @ \item[[[C]]] <>= void pssn::C(float *buffer) { HANDLE_ERROR( cudaMemcpy( buffer, d__C, sizeof(float2)*NN, cudaMemcpyDeviceToDevice ) ); } @ \item[[[buffer]]] <>= void pssn::B(float *h__buffer) { HANDLE_ERROR( cudaMemcpy( h__buffer, buffer, sizeof(float2)*NN, cudaMemcpyDeviceToDevice ) ); } @ \end{description}