// ************************************************************************** // gayberne.cu // ------------------- // W. Michael Brown (ORNL) // // Device code for Gay-Berne potential acceleration // // __________________________________________________________________________ // This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) // __________________________________________________________________________ // // begin : // email : brownw@ornl.gov // ***************************************************************************/ #ifdef NV_KERNEL #include "lal_ellipsoid_extra.h" #endif ucl_inline void compute_eta_torque(numtyp m[9],numtyp m2[9], const numtyp4 shape, numtyp ans[9]) { numtyp den = m[3]*m[2]*m[7]-m[0]*m[5]*m[7]- m[2]*m[6]*m[4]+m[1]*m[6]*m[5]- m[3]*m[1]*m[8]+m[0]*m[4]*m[8]; den = ucl_recip(den); ans[0] = shape.x*(m[5]*m[1]*m2[2]+(numtyp)2.0*m[4]*m[8]*m2[0]- m[4]*m2[2]*m[2]-(numtyp)2.0*m[5]*m2[0]*m[7]+ m2[1]*m[2]*m[7]-m2[1]*m[1]*m[8]- m[3]*m[8]*m2[1]+m[6]*m[5]*m2[1]+ m[3]*m2[2]*m[7]-m2[2]*m[6]*m[4])*den; ans[1] = shape.x*(m[2]*m2[0]*m[7]-m[8]*m2[0]*m[1]+ (numtyp)2.0*m[0]*m[8]*m2[1]-m[0]*m2[2]*m[5]- (numtyp)2.0*m[6]*m[2]*m2[1]+m2[2]*m[3]*m[2]- m[8]*m[3]*m2[0]+m[6]*m2[0]*m[5]+ m[6]*m2[2]*m[1]-m2[2]*m[0]*m[7])*den; ans[2] = shape.x*(m[1]*m[5]*m2[0]-m[2]*m2[0]*m[4]- m[0]*m[5]*m2[1]+m[3]*m[2]*m2[1]- m2[1]*m[0]*m[7]-m[6]*m[4]*m2[0]+ (numtyp)2.0*m[4]*m[0]*m2[2]-(numtyp)2.0*m[3]*m2[2]*m[1]+ m[3]*m[7]*m2[0]+m[6]*m2[1]*m[1])*den; ans[3] = shape.y*(-m[4]*m2[5]*m[2]+(numtyp)2.0*m[4]*m[8]*m2[3]+ m[5]*m[1]*m2[5]-(numtyp)2.0*m[5]*m2[3]*m[7]+ m2[4]*m[2]*m[7]-m2[4]*m[1]*m[8]- m[3]*m[8]*m2[4]+m[6]*m[5]*m2[4]- m2[5]*m[6]*m[4]+m[3]*m2[5]*m[7])*den; ans[4] = shape.y*(m[2]*m2[3]*m[7]-m[1]*m[8]*m2[3]+ (numtyp)2.0*m[8]*m[0]*m2[4]-m2[5]*m[0]*m[5]- (numtyp)2.0*m[6]*m2[4]*m[2]-m[3]*m[8]*m2[3]+ m[6]*m[5]*m2[3]+m[3]*m2[5]*m[2]- m[0]*m2[5]*m[7]+m2[5]*m[1]*m[6])*den; ans[5] = shape.y*(m[1]*m[5]*m2[3]-m[2]*m2[3]*m[4]- m[0]*m[5]*m2[4]+m[3]*m[2]*m2[4]+ (numtyp)2.0*m[4]*m[0]*m2[5]-m[0]*m2[4]*m[7]+ m[1]*m[6]*m2[4]-m2[3]*m[6]*m[4]- (numtyp)2.0*m[3]*m[1]*m2[5]+m[3]*m2[3]*m[7])*den; ans[6] = shape.z*(-m[4]*m[2]*m2[8]+m[1]*m[5]*m2[8]+ (numtyp)2.0*m[4]*m2[6]*m[8]-m[1]*m2[7]*m[8]+ m[2]*m[7]*m2[7]-(numtyp)2.0*m2[6]*m[7]*m[5]- m[3]*m2[7]*m[8]+m[5]*m[6]*m2[7]- m[4]*m[6]*m2[8]+m[7]*m[3]*m2[8])*den; ans[7] = shape.z*-(m[1]*m[8]*m2[6]-m[2]*m2[6]*m[7]- (numtyp)2.0*m2[7]*m[0]*m[8]+m[5]*m2[8]*m[0]+ (numtyp)2.0*m2[7]*m[2]*m[6]+m[3]*m2[6]*m[8]- m[3]*m[2]*m2[8]-m[5]*m[6]*m2[6]+ m[0]*m2[8]*m[7]-m2[8]*m[1]*m[6])*den; ans[8] = shape.z*(m[1]*m[5]*m2[6]-m[2]*m2[6]*m[4]- m[0]*m[5]*m2[7]+m[3]*m[2]*m2[7]- m[4]*m[6]*m2[6]-m[7]*m2[7]*m[0]+ (numtyp)2.0*m[4]*m2[8]*m[0]+m[7]*m[3]*m2[6]+ m[6]*m[1]*m2[7]-(numtyp)2.0*m2[8]*m[3]*m[1])*den; } __kernel void k_gayberne(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict q, const __global numtyp4 *restrict shape, const __global numtyp4 *restrict well, const __global numtyp *restrict gum, const __global numtyp2 *restrict sig_eps, const int ntypes, const __global numtyp *restrict lshape, const __global int *dev_nbor, const int stride, __global acctyp4 *restrict ans, const int astride, __global acctyp *restrict engv, __global int *restrict err_flag, const int eflag, const int vflag, const int inum, const int t_per_atom) { int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); __local numtyp sp_lj[4]; sp_lj[0]=gum[3]; sp_lj[1]=gum[4]; sp_lj[2]=gum[5]; sp_lj[3]=gum[6]; acctyp energy=(acctyp)0; acctyp4 f; f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; acctyp4 tor; tor.x=(acctyp)0; tor.y=(acctyp)0; tor.z=(acctyp)0; acctyp virial[6]; for (int i=0; i<6; i++) virial[i]=(acctyp)0; if (ii0) energy+=u_r*temp2; numtyp temp1 = -eta*u_r*factor_lj; if (vflag>0) { r12[0]*=-r; r12[1]*=-r; r12[2]*=-r; numtyp ft=temp1*dchi[0]-temp2*dUr[0]; f.x+=ft; virial[0]+=r12[0]*ft; ft=temp1*dchi[1]-temp2*dUr[1]; f.y+=ft; virial[1]+=r12[1]*ft; virial[3]+=r12[0]*ft; ft=temp1*dchi[2]-temp2*dUr[2]; f.z+=ft; virial[2]+=r12[2]*ft; virial[4]+=r12[0]*ft; virial[5]+=r12[1]*ft; } else { f.x+=temp1*dchi[0]-temp2*dUr[0]; f.y+=temp1*dchi[1]-temp2*dUr[1]; f.z+=temp1*dchi[2]-temp2*dUr[2]; } // Torque on 1 temp1 = -u_r*eta*factor_lj; temp2 = -u_r*chi*factor_lj; numtyp temp3 = -chi*eta*factor_lj; tor.x+=temp1*tchi[0]+temp2*teta[0]+temp3*tUr[0]; tor.y+=temp1*tchi[1]+temp2*teta[1]+temp3*tUr[1]; tor.z+=temp1*tchi[2]+temp2*teta[2]+temp3*tUr[2]; } // for nbor store_answers_t(f,tor,energy,virial,ii,astride,tid,t_per_atom,offset,eflag, vflag,ans,engv); } // if ii }