// ************************************************************************** // lal_table.cu // ------------------- // Trung Dac Nguyen (ORNL) // // Device code for acceleration of the table pair style // // __________________________________________________________________________ // This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) // __________________________________________________________________________ // // begin : // email : nguyentd@ornl.gov // ***************************************************************************/ #ifdef NV_KERNEL #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE texture pos_tex; #else texture pos_tex; #endif #else #define pos_tex x_ #endif #define LOOKUP 0 #define LINEAR 1 #define SPLINE 2 #define BITMAP 3 #ifndef __UNION_INT_FLOAT #define __UNION_INT_FLOAT typedef union { int i; float f; } union_int_float; #endif /// ---------------- LOOKUP ------------------------------------------------- __kernel void k_table(const __global numtyp4 *restrict x_, const __global int *restrict tabindex, const __global numtyp4 *restrict coeff2, const __global numtyp4 *restrict coeff3, const __global numtyp4 *restrict coeff4, const int lj_types, const __global numtyp *restrict cutsq, const __global numtyp *restrict sp_lj_in, const __global int *dev_nbor, const __global int *dev_packed, __global acctyp4 *restrict ans, __global acctyp *restrict engv, const int eflag, const int vflag, const int inum, const int nbor_pitch, const int t_per_atom, int tablength) { int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); __local numtyp sp_lj[4]; sp_lj[0]=sp_lj_in[0]; sp_lj[1]=sp_lj_in[1]; sp_lj[2]=sp_lj_in[2]; sp_lj[3]=sp_lj_in[3]; acctyp energy=(acctyp)0; acctyp4 f; f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; acctyp virial[6]; for (int i=0; i<6; i++) virial[i]=(acctyp)0; int tlm1 = tablength - 1; if (ii0) { numtyp e = (numtyp)0.0; if (itable < tlm1) e = coeff3[idx].y; energy+=factor_lj*e; } if (vflag>0) { virial[0] += delx*delx*force; virial[1] += dely*dely*force; virial[2] += delz*delz*force; virial[3] += delx*dely*force; virial[4] += delx*delz*force; virial[5] += dely*delz*force; } } } // for nbor store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv); } // if ii } __kernel void k_table_fast(const __global numtyp4 *restrict x_, const __global int *restrict tabindex, const __global numtyp4 *restrict coeff2, const __global numtyp4 *restrict coeff3, const __global numtyp4 *restrict coeff4, const __global numtyp *restrict cutsq_in, const __global numtyp *restrict sp_lj_in, const __global int *dev_nbor, const __global int *dev_packed, __global acctyp4 *restrict ans, __global acctyp *restrict engv, const int eflag, const int vflag, const int inum, const int nbor_pitch, const int t_per_atom, int tablength) { int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); __local numtyp cutsq[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; __local numtyp sp_lj[4]; if (tid<4) sp_lj[tid]=sp_lj_in[tid]; if (tid0) { numtyp e = (numtyp)0.0; if (itable < tlm1) e = coeff3[idx].y; energy+=factor_lj*e; } if (vflag>0) { virial[0] += delx*delx*force; virial[1] += dely*dely*force; virial[2] += delz*delz*force; virial[3] += delx*dely*force; virial[4] += delx*delz*force; virial[5] += dely*delz*force; } } } // for nbor store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv); } // if ii } /// ---------------- LINEAR ------------------------------------------------- __kernel void k_table_linear(const __global numtyp4 *restrict x_, const __global int *restrict tabindex, const __global numtyp4 *restrict coeff2, const __global numtyp4 *restrict coeff3, const __global numtyp4 *restrict coeff4, const int lj_types, const __global numtyp *restrict cutsq, const __global numtyp *restrict sp_lj_in, const __global int *dev_nbor, const __global int *dev_packed, __global acctyp4 *restrict ans, __global acctyp *restrict engv, const int eflag, const int vflag, const int inum, const int nbor_pitch, const int t_per_atom, int tablength) { int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); __local numtyp sp_lj[4]; sp_lj[0]=sp_lj_in[0]; sp_lj[1]=sp_lj_in[1]; sp_lj[2]=sp_lj_in[2]; sp_lj[3]=sp_lj_in[3]; acctyp energy=(acctyp)0; acctyp4 f; f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; acctyp virial[6]; for (int i=0; i<6; i++) virial[i]=(acctyp)0; int tlm1 = tablength - 1; if (ii0) { numtyp e = (numtyp)0.0; if (itable < tlm1) e = coeff3[idx].y + fraction*coeff4[idx].y; energy+=factor_lj*e; } if (vflag>0) { virial[0] += delx*delx*force; virial[1] += dely*dely*force; virial[2] += delz*delz*force; virial[3] += delx*dely*force; virial[4] += delx*delz*force; virial[5] += dely*delz*force; } } } // for nbor store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv); } // if ii } __kernel void k_table_linear_fast(const __global numtyp4 *restrict x_, const __global int *restrict tabindex, const __global numtyp4 *restrict coeff2, const __global numtyp4 *restrict coeff3, const __global numtyp4 *restrict coeff4, const __global numtyp *restrict cutsq_in, const __global numtyp *restrict sp_lj_in, const __global int *dev_nbor, const __global int *dev_packed, __global acctyp4 *restrict ans, __global acctyp *restrict engv, const int eflag, const int vflag, const int inum, const int nbor_pitch, const int t_per_atom, int tablength) { int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); __local numtyp cutsq[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; __local numtyp sp_lj[4]; if (tid<4) sp_lj[tid]=sp_lj_in[tid]; if (tid0) { numtyp e = (numtyp)0.0; if (itable < tlm1) e = coeff3[idx].y + fraction*coeff4[idx].y; energy+=factor_lj*e; } if (vflag>0) { virial[0] += delx*delx*force; virial[1] += dely*dely*force; virial[2] += delz*delz*force; virial[3] += delx*dely*force; virial[4] += delx*delz*force; virial[5] += dely*delz*force; } } } // for nbor store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv); } // if ii } /// ---------------- SPLINE ------------------------------------------------- __kernel void k_table_spline(const __global numtyp4 *restrict x_, const __global int *restrict tabindex, const __global numtyp4 *restrict coeff2, const __global numtyp4 *restrict coeff3, const __global numtyp4 *restrict coeff4, const int lj_types, const __global numtyp *restrict cutsq, const __global numtyp *restrict sp_lj_in, const __global int *dev_nbor, const __global int *dev_packed, __global acctyp4 *restrict ans, __global acctyp *restrict engv, const int eflag, const int vflag, const int inum, const int nbor_pitch, const int t_per_atom, int tablength) { int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); __local numtyp sp_lj[4]; sp_lj[0]=sp_lj_in[0]; sp_lj[1]=sp_lj_in[1]; sp_lj[2]=sp_lj_in[2]; sp_lj[3]=sp_lj_in[3]; acctyp energy=(acctyp)0; acctyp4 f; f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; acctyp virial[6]; for (int i=0; i<6; i++) virial[i]=(acctyp)0; int tlm1 = tablength - 1; if (ii0) { numtyp e = (numtyp)0.0; if (itable < tlm1) { e = a * coeff3[idx].y + b * coeff3[idx+1].y + ((a*a*a-a)*coeff4[idx].y + (b*b*b-b)*coeff4[idx+1].y) * coeff2[mtype].z; } energy+=factor_lj*e; } if (vflag>0) { virial[0] += delx*delx*force; virial[1] += dely*dely*force; virial[2] += delz*delz*force; virial[3] += delx*dely*force; virial[4] += delx*delz*force; virial[5] += dely*delz*force; } } } // for nbor store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv); } // if ii } __kernel void k_table_spline_fast(const __global numtyp4 *x_, const __global int *tabindex, const __global numtyp4* coeff2, const __global numtyp4 *coeff3, const __global numtyp4 *coeff4, const __global numtyp *cutsq_in, const __global numtyp* sp_lj_in, const __global int *dev_nbor, const __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, const int vflag, const int inum, const int nbor_pitch, const int t_per_atom, int tablength) { int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); __local numtyp cutsq[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; __local numtyp sp_lj[4]; if (tid<4) sp_lj[tid]=sp_lj_in[tid]; if (tid0) { numtyp e = (numtyp)0.0; if (itable < tlm1) { e = a * coeff3[idx].y + b * coeff3[idx+1].y + ((a*a*a-a)*coeff4[idx].y + (b*b*b-b)*coeff4[idx+1].y) * coeff2[mtype].z; } energy+=factor_lj*e; } if (vflag>0) { virial[0] += delx*delx*force; virial[1] += dely*dely*force; virial[2] += delz*delz*force; virial[3] += delx*dely*force; virial[4] += delx*delz*force; virial[5] += dely*delz*force; } } } // for nbor store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv); } // if ii } /// ---------------- BITMAP ------------------------------------------------- __kernel void k_table_bitmap(const __global numtyp4 *x_, const __global int *tabindex, const __global int *nshiftbits, const __global int *nmask, const __global numtyp4* coeff2, const __global numtyp4 *coeff3, const __global numtyp4 *coeff4, const int lj_types, const __global numtyp *cutsq, const __global numtyp* sp_lj_in, const __global int *dev_nbor, const __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, const int vflag, const int inum, const int nbor_pitch, const int t_per_atom, int tablength) { int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); __local numtyp sp_lj[4]; sp_lj[0]=sp_lj_in[0]; sp_lj[1]=sp_lj_in[1]; sp_lj[2]=sp_lj_in[2]; sp_lj[3]=sp_lj_in[3]; acctyp energy=(acctyp)0; acctyp4 f; f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; acctyp virial[6]; for (int i=0; i<6; i++) virial[i]=(acctyp)0; int tlm1 = tablength - 1; if (ii>= nshiftbits[mtype]; if (itable <= tlm1) { idx = itable + tbindex*tablength; fraction = (rsq_lookup.f - coeff3[idx].x) * coeff4[idx].w; value = coeff3[idx].z + fraction*coeff4[idx].z; force = factor_lj * value; } else force = (numtyp)0.0; f.x+=delx*force; f.y+=dely*force; f.z+=delz*force; if (eflag>0) { numtyp e = (numtyp)0.0; if (itable <= tlm1) e = coeff3[idx].y + fraction*coeff4[idx].y; energy+=factor_lj*e; } if (vflag>0) { virial[0] += delx*delx*force; virial[1] += dely*dely*force; virial[2] += delz*delz*force; virial[3] += delx*dely*force; virial[4] += delx*delz*force; virial[5] += dely*delz*force; } } } // for nbor store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv); } // if ii } __kernel void k_table_bitmap_fast(const __global numtyp4 *x_, const __global int *tabindex, const __global int *nshiftbits, const __global int *nmask, const __global numtyp4* coeff2, const __global numtyp4 *coeff3, const __global numtyp4 *coeff4, const __global numtyp *cutsq_in, const __global numtyp* sp_lj_in, const __global int *dev_nbor, const __global int *dev_packed, __global acctyp4 *ans, __global acctyp *engv, const int eflag, const int vflag, const int inum, const int nbor_pitch, const int t_per_atom, int tablength) { int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); __local numtyp cutsq[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; __local numtyp sp_lj[4]; if (tid<4) sp_lj[tid]=sp_lj_in[tid]; if (tid>= nshiftbits[mtype]; if (itable <= tlm1) { idx = itable + tbindex*tablength; fraction = (rsq_lookup.f - coeff3[idx].x) * coeff4[idx].w; value = coeff3[idx].z + fraction*coeff4[idx].z; force = factor_lj * value; } else force = (numtyp)0.0; f.x+=delx*force; f.y+=dely*force; f.z+=delz*force; if (eflag>0) { numtyp e = (numtyp)0.0; if (itable <= tlm1) e = coeff3[idx].y + fraction*coeff4[idx].y; energy+=factor_lj*e; } if (vflag>0) { virial[0] += delx*delx*force; virial[1] += dely*dely*force; virial[2] += delz*delz*force; virial[3] += delx*dely*force; virial[4] += delx*delz*force; virial[5] += dely*delz*force; } } } // for nbor store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv); } // if ii }