/*************************************************************************** answer.h ------------------- W. Michael Brown (ORNL) Class for data management of forces, torques, energies, and virials __________________________________________________________________________ This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) __________________________________________________________________________ begin : email : brownw@ornl.gov ***************************************************************************/ #ifndef LAL_ANSWER_H #define LAL_ANSWER_H #include #include "mpi.h" #if defined(USE_OPENCL) #include "geryon/ocl_timer.h" #include "geryon/ocl_mat.h" using namespace ucl_opencl; #elif defined(USE_CUDART) #include "geryon/nvc_timer.h" #include "geryon/nvc_mat.h" using namespace ucl_cudart; #else #include "geryon/nvd_timer.h" #include "geryon/nvd_mat.h" using namespace ucl_cudadr; #endif #include "lal_precision.h" namespace LAMMPS_AL { template class Answer { public: Answer(); ~Answer() { clear(); } /// Current number of local atoms stored inline int inum() const { return _inum; } /// Set number of local atoms for future copy operations inline void inum(const int n) { _inum=n; } /// Return the maximum number of atoms that can be stored currently inline int max_inum() const { return _max_local; } /// Return the number of fields used for energy and virial inline int ev_fields(const int mode) const { return (mode == 1) ? _ev_fields : _e_fields; } /// Memory usage per atom in this class int bytes_per_atom() const; /// Clear any previous data and set up for a new LAMMPS run /** \param rot True if atom storage needs quaternions **/ bool init(const int inum, const bool charge, const bool rot, UCL_Device &dev); /// Check if we have enough device storage and realloc if not inline void resize(const int inum, bool &success) { _inum=inum; if (inum>_max_local) { _max_local=static_cast(static_cast(inum)*1.10); success=success && (force.resize(_max_local*_ans_fields)==UCL_SUCCESS); success=success && (engv.resize(_max_local*_ev_fields)==UCL_SUCCESS); _gpu_bytes=engv.device.row_bytes()+force.device.row_bytes(); } } /// If already initialized by another LAMMPS style, add fields as necessary /** \param rot True if atom storage needs quaternions **/ bool add_fields(const bool charge, const bool rot); /// Free all memory on host and device void clear(); /// Return the total amount of host memory used by class in bytes double host_memory_usage() const; /// Add copy times to timers inline void acc_timers() { time_answer.add_to_total(); } /// Add copy times to timers inline void zero_timers() { time_answer.zero(); } /// Return the total time for host/device data transfer inline double transfer_time() { return time_answer.total_seconds(); } /// Return the total time for data cast/pack inline double cast_time() { return _time_cast; } /// Return number of bytes used on device inline double gpu_bytes() { return _gpu_bytes; } // -------------------------COPY FROM GPU ------------------------------- /// Copy answers from device into read buffer asynchronously void copy_answers(const bool eflag, const bool vflag, const bool ef_atom, const bool vf_atom); /// Copy answers from device into read buffer asynchronously void copy_answers(const bool eflag, const bool vflag, const bool ef_atom, const bool vf_atom, int *ilist); /// Copy energy and virial data into LAMMPS memory double energy_virial(double *eatom, double **vatom, double *virial); /// Copy energy and virial data into LAMMPS memory double energy_virial(double *eatom, double **vatom, double *virial, double &ecoul); /// Add forces and torques from the GPU into a LAMMPS pointer void get_answers(double **f, double **tor); inline double get_answers(double **f, double **tor, double *eatom, double **vatom, double *virial, double &ecoul) { double ta=MPI_Wtime(); time_answer.sync_stop(); _time_cpu_idle+=MPI_Wtime()-ta; double ts=MPI_Wtime(); double evdw=energy_virial(eatom,vatom,virial,ecoul); get_answers(f,tor); _time_cast+=MPI_Wtime()-ts; return evdw; } /// Return the time the CPU was idle waiting for GPU inline double cpu_idle_time() { return _time_cpu_idle; } /// Change the command queue used for copies and timers void cq(const int cq_index); // ------------------------------ DATA ---------------------------------- /// Force and possibly torque UCL_Vector force; /// Energy and virial per-atom storage UCL_Vector engv; /// Device timers UCL_Timer time_answer; /// Geryon device UCL_Device *dev; private: bool alloc(const int inum); bool _allocated, _eflag, _vflag, _ef_atom, _vf_atom, _rot, _charge, _other; int _max_local, _inum, _e_fields, _ev_fields, _ans_fields; int *_ilist; double _time_cast, _time_cpu_idle; double _gpu_bytes; bool _newton; }; } #endif