// ************************************************************************** // lj_coul_msm.cu // ------------------- // Trung Dac Nguyen (ORNL) // // Device code for acceleration of the lj/cut/coul/msm pair style // // __________________________________________________________________________ // This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) // __________________________________________________________________________ // // begin : // email : nguyentd@ornl.gov // ***************************************************************************/ #ifdef NV_KERNEL #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE texture pos_tex; texture q_tex; texture gcons_tex; texture dgcons_tex; #else texture pos_tex; texture q_tex; texture gcons_tex; texture dgcons_tex; #endif #else #define pos_tex x_ #define q_tex q_ #define gcons_tex gcons #define dgcons_tex dgcons #endif /* ---------------------------------------------------------------------- compute gamma for MSM and pair styles see Eq 4 from Parallel Computing 35 (2009) 164–177 ------------------------------------------------------------------------- */ ucl_inline numtyp gamma(const numtyp rho, const int order, const __global numtyp *gcons) { if (rho <= (numtyp)1.0) { const int split_order = order/2; const numtyp rho2 = rho*rho; numtyp g; fetch(g,7*split_order+0,gcons_tex); numtyp rho_n = rho2; for (int n=1; n<=split_order; n++) { numtyp tmp; fetch(tmp,7*split_order+n,gcons_tex); g += tmp*rho_n; rho_n *= rho2; } return g; } else return ((numtyp)1.0/rho); } /* ---------------------------------------------------------------------- compute the derivative of gamma for MSM and pair styles see Eq 4 from Parallel Computing 35 (2009) 164-177 ------------------------------------------------------------------------- */ ucl_inline numtyp dgamma(const numtyp rho, const int order, const __global numtyp *dgcons) { if (rho <= (numtyp)1.0) { const int split_order = order/2; const numtyp rho2 = rho*rho; numtyp dg; fetch(dg,6*split_order+0,dgcons_tex); dg *= rho; numtyp rho_n = rho*rho2; for (int n=1; n0) { if (rsq < cut_coulsq) e_coul += prefactor*(egamma-factor_coul); if (rsq < lj1[mtype].w) { numtyp e=r6inv*(lj3[mtype].x*r6inv-lj3[mtype].y); energy+=factor_lj*(e-lj3[mtype].z); } } if (vflag>0) { virial[0] += delx*delx*force; virial[1] += dely*dely*force; virial[2] += delz*delz*force; virial[3] += delx*dely*force; virial[4] += delx*delz*force; virial[5] += dely*delz*force; } } } // for nbor store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag, vflag,ans,engv); } // if ii } __kernel void k_lj_coul_msm_fast(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict lj1_in, const __global numtyp4 *restrict lj3_in, const __global numtyp *restrict gcons, const __global numtyp *restrict dgcons, const __global numtyp *restrict sp_lj_in, const __global int *dev_nbor, const __global int *dev_packed, __global acctyp4 *restrict ans, __global acctyp *restrict engv, const int eflag, const int vflag, const int inum, const int nbor_pitch, const __global numtyp *restrict q_, const numtyp cut_coulsq, const numtyp qqrd2e, const int order, const int t_per_atom) { int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); __local numtyp4 lj1[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; __local numtyp4 lj3[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; __local numtyp sp_lj[8]; if (tid<8) sp_lj[tid]=sp_lj_in[tid]; if (tid0) lj3[tid]=lj3_in[tid]; } acctyp energy=(acctyp)0; acctyp e_coul=(acctyp)0; acctyp4 f; f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; acctyp virial[6]; for (int i=0; i<6; i++) virial[i]=(acctyp)0; __syncthreads(); if (ii0) { if (rsq < cut_coulsq) e_coul += prefactor*(egamma-factor_coul); if (rsq < lj1[mtype].w) { numtyp e=r6inv*(lj3[mtype].x*r6inv-lj3[mtype].y); energy+=factor_lj*(e-lj3[mtype].z); } } if (vflag>0) { virial[0] += delx*delx*force; virial[1] += dely*dely*force; virial[2] += delz*delz*force; virial[3] += delx*dely*force; virial[4] += delx*delz*force; virial[5] += dely*delz*force; } } } // for nbor store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag, vflag,ans,engv); } // if ii }