// ************************************************************************** // gayberne_lj.cu // ------------------- // W. Michael Brown (ORNL) // // Device code for Gay-Berne - Lennard-Jones potential acceleration // // __________________________________________________________________________ // This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) // __________________________________________________________________________ // // begin : // email : brownw@ornl.gov // ***************************************************************************/ #ifdef NV_KERNEL #include "lal_ellipsoid_extra.h" #endif __kernel void k_gayberne_sphere_ellipsoid(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict q, const __global numtyp4 *restrict shape, const __global numtyp4 *restrict well, const __global numtyp *restrict gum, const __global numtyp2 *restrict sig_eps, const int ntypes, const __global numtyp *restrict lshape, const __global int *dev_nbor, const int stride, __global acctyp4 *restrict ans, __global acctyp *restrict engv, __global int *restrict err_flag, const int eflag, const int vflag, const int start, const int inum, const int t_per_atom) { int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); ii+=start; __local numtyp sp_lj[4]; sp_lj[0]=gum[3]; sp_lj[1]=gum[4]; sp_lj[2]=gum[5]; sp_lj[3]=gum[6]; acctyp energy=(acctyp)0; acctyp4 f; f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; acctyp virial[6]; for (int i=0; i<6; i++) virial[i]=(acctyp)0; if (ii0) energy+=u_r*temp2; numtyp temp1 = -eta*u_r*factor_lj; if (vflag>0) { r12[0]*=-1; r12[1]*=-1; r12[2]*=-1; numtyp ft=temp1*dchi[0]-temp2*dUr[0]; f.x+=ft; virial[0]+=r12[0]*ft; ft=temp1*dchi[1]-temp2*dUr[1]; f.y+=ft; virial[1]+=r12[1]*ft; virial[3]+=r12[0]*ft; ft=temp1*dchi[2]-temp2*dUr[2]; f.z+=ft; virial[2]+=r12[2]*ft; virial[4]+=r12[0]*ft; virial[5]+=r12[1]*ft; } else { f.x+=temp1*dchi[0]-temp2*dUr[0]; f.y+=temp1*dchi[1]-temp2*dUr[1]; f.z+=temp1*dchi[2]-temp2*dUr[2]; } } // for nbor store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv); } // if ii } __kernel void k_gayberne_lj(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict lj1, const __global numtyp4 *restrict lj3, const int lj_types, const __global numtyp *restrict gum, const int stride, const __global int *dev_ij, __global acctyp4 *restrict ans, __global acctyp *restrict engv, __global int *restrict err_flag, const int eflag, const int vflag, const int start, const int inum, const int t_per_atom) { int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); ii+=start; __local numtyp sp_lj[4]; sp_lj[0]=gum[3]; sp_lj[1]=gum[4]; sp_lj[2]=gum[5]; sp_lj[3]=gum[6]; acctyp energy=(acctyp)0; acctyp4 f; f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; acctyp virial[6]; for (int i=0; i<6; i++) virial[i]=(acctyp)0; if (ii0) { numtyp e=r6inv*(lj3[ii].x*r6inv-lj3[ii].y); energy+=factor_lj*(e-lj3[ii].z); } if (vflag>0) { virial[0] += delx*delx*force; virial[1] += dely*dely*force; virial[2] += delz*delz*force; virial[3] += delx*dely*force; virial[4] += delx*delz*force; virial[5] += dely*delz*force; } } } // for nbor acc_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv); } // if ii } __kernel void k_gayberne_lj_fast(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict lj1_in, const __global numtyp4 *restrict lj3_in, const __global numtyp *restrict gum, const int stride, const __global int *dev_ij, __global acctyp4 *restrict ans, __global acctyp *restrict engv, __global int *restrict err_flag, const int eflag, const int vflag, const int start, const int inum, const int t_per_atom) { int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); ii+=start; __local numtyp sp_lj[4]; __local numtyp4 lj1[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; __local numtyp4 lj3[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; if (tid<4) sp_lj[tid]=gum[tid+3]; if (tid0) lj3[tid]=lj3_in[tid]; } acctyp energy=(acctyp)0; acctyp4 f; f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; acctyp virial[6]; for (int i=0; i<6; i++) virial[i]=(acctyp)0; __syncthreads(); if (ii0) { numtyp e=r6inv*(lj3[mtype].x*r6inv-lj3[mtype].y); energy+=factor_lj*(e-lj3[mtype].z); } if (vflag>0) { virial[0] += delx*delx*force; virial[1] += dely*dely*force; virial[2] += delz*delz*force; virial[3] += delx*dely*force; virial[4] += delx*delz*force; virial[5] += dely*delz*force; } } } // for nbor acc_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv); } // if ii }