/*************************************************************************** ucl_matrix.h ------------------- W. Michael Brown Matrix Container on Host __________________________________________________________________________ This file is part of the Geryon Unified Coprocessor Library (UCL) __________________________________________________________________________ begin : Thu May 10 2012 copyright : (C) 2012 by W. Michael Brown email : brownw@ornl.gov ***************************************************************************/ /* ----------------------------------------------------------------------- This software is distributed under the Simplified BSD License. ----------------------------------------------------------------------- */ // Only allow this file to be included by CUDA and OpenCL specific headers #ifdef _UCL_MAT_ALLOW /// Matrix S-Object template class UCL_Matrix { public: // Traits for copying data // MEM_TYPE is 0 for device, 1 for host, and 2 for image enum traits { DATA_TYPE = _UCL_DATA_ID::id, MEM_TYPE = 1, PADDED = 0, ROW_MAJOR = 1, VECTOR = 0 }; typedef hosttype data_type; /// Host Allocation UCL_H_Mat host; /// Device Allocation UCL_D_Mat device; UCL_Matrix() { } ~UCL_Matrix() { } /// Construct with specied number of rows and columns /** \sa alloc() **/ UCL_Matrix(const size_t rows, const size_t cols, UCL_Device &acc, const enum UCL_MEMOPT kind1=UCL_READ_WRITE, const enum UCL_MEMOPT kind2=UCL_READ_WRITE) { _ucl_s_obj_help< ucl_same_type::ans >:: alloc(host,device,_buffer,rows,cols,acc,kind1,kind2); } /// Set up host matrix with specied # of rows/cols and reserve memory /** The kind1 parameter controls memory access from the host * - UCL_READ_WRITE - Specify that you will read and write from host * - UCL_WRITE_ONLY - Specify that you will only write from host * - UCL_READ_ONLY - Specify that you will only read from host * - UCL_NOT_PINNED - Memory is not pinned/page-locked on host * The kind2 parameter controls memory optimizations from the device: * - UCL_READ_WRITE - Specify that you will read and write in kernels * - UCL_WRITE_ONLY - Specify that you will only write in kernels * - UCL_READ_ONLY - Specify that you will only read in kernels * \note When passing a command queue instead of a device, the device * allocation is always performed. Even if the device shares memory * with the host. * \param cq Default command queue for operations copied from another mat * \return UCL_SUCCESS if the memory allocation is successful **/ template inline int alloc(const size_t rows, const size_t cols, mat_type &cq, const enum UCL_MEMOPT kind1=UCL_READ_WRITE, const enum UCL_MEMOPT kind2=UCL_READ_WRITE) { return _ucl_s_obj_help< ucl_same_type::ans >:: alloc(host,device,_buffer,rows,cols,cq,kind1,kind2); } /// Set up host matrix with specied # of rows/cols and reserve memory /** The kind1 parameter controls memory access from the host * - UCL_READ_WRITE - Specify that you will read and write from host * - UCL_WRITE_ONLY - Specify that you will only write from host * - UCL_READ_ONLY - Specify that you will only read from host * - UCL_NOT_PINNED - Memory is not pinned/page-locked on host * The kind2 parameter controls memory optimizations from the device: * - UCL_READ_WRITE - Specify that you will read and write in kernels * - UCL_WRITE_ONLY - Specify that you will only write in kernels * - UCL_READ_ONLY - Specify that you will only read in kernels * \param device Used to get the default command queue for operations * \return UCL_SUCCESS if the memory allocation is successful **/ inline int alloc(const size_t rows, const size_t cols, UCL_Device &acc, const enum UCL_MEMOPT kind1=UCL_READ_WRITE, const enum UCL_MEMOPT kind2=UCL_READ_WRITE) { return _ucl_s_obj_help< ucl_same_type::ans >:: alloc(host,device,_buffer,rows,cols,acc,kind1,kind2); } /// Free memory and set size to 0 inline void clear() { host.clear(); device.clear(); } /// Resize the allocation to contain cols elements inline int resize(const int rows, const int cols) { assert(host.kind()!=UCL_VIEW); int err=host.resize(rows,cols); if (err!=UCL_SUCCESS) return err; return _ucl_s_obj_help< ucl_same_type::ans >:: dev_resize(device,host,_buffer,rows,cols); } /// Resize (only if bigger) the allocation to contain cols elements inline int resize_ib(const int new_rows, const int new_cols) { if (new_rows>rows() || new_cols>cols()) return resize(new_rows,new_cols); else return UCL_SUCCESS; } /// Set each element to zero (asynchronously on device) inline void zero() { zero(cq()); } /// Set first n elements to zero (asynchronously on device) inline void zero(const int n) { zero(n,cq()); } /// Set each element to zero (asynchronously on device) inline void zero(command_queue &cq) { host.zero(); if (device.kind()!=UCL_VIEW) device.zero(cq); else if (_buffer.numel()>0) _buffer.zero(); } /// Set first n elements to zero (asynchronously on device) inline void zero(const int n, command_queue &cq) { host.zero(n); if (device.kind()!=UCL_VIEW) device.zero(n,cq); else if (_buffer.numel()>0) _buffer.zero(); } /// Get the number of elements inline size_t numel() const { return host.numel(); } /// Get the number of rows inline size_t rows() const { return host.rows(); } /// Get the number of columns inline size_t cols() const { return host.cols(); } /// Get the memory usage (bytes) of the s-object (including any buffers) inline size_t host_mem_usage() { return host.row_bytes()*host.rows()+_buffer.row_bytes()*_buffer.rows(); } /// Get the memory usage (bytes) of the s-object (including any buffers) inline size_t device_mem_usage() { return device.row_bytes()*device.rows(); } /// Get element at index i inline hosttype & operator[](const int i) { return host[i]; } /// Get element at index i inline const hosttype & operator[](const int i) const { return host[i]; } /// 2D access (row should always be 0) inline hosttype & operator()(const int row, const int col) { return host(row,col); } /// 2D access (row should always be 0) inline const hosttype & operator()(const int row, const int col) const { return host(row,col); } /// Returns pointer to memory pointer for allocation on host inline hosttype ** host_ptr() { return host.host_ptr(); } /// Return the default command queue/stream associated with this data inline command_queue & cq() { return host.cq(); } /// Change the default command queue associated with this data inline void cq(command_queue &cq_in) { host.cq(cq_in); device.cq(cq_in); } /// Block until command_queue associated with matrix is complete inline void sync() { host.sync(); } ///Get the size of a row on the host (including any padding) in elements inline size_t row_size() const { return host.row_size(); } /// Get the size of a row on the host(including any padding) in bytes inline size_t row_bytes() const { return host.row_bytes(); } /// Get the size on the host in bytes of 1 element inline int element_size() const { return sizeof(hosttype); } /// Update the allocation on the host asynchronously inline void update_host() { _ucl_s_obj_help< ucl_same_type::ans >:: copy(host,device,_buffer,true); } /// Update the allocation on the host (true for asynchronous copy) inline void update_host(const bool async) { _ucl_s_obj_help< ucl_same_type::ans >:: copy(host,device,_buffer,async); } /// Update the allocation on the host (using command queue) inline void update_host(command_queue &cq) { _ucl_s_obj_help< ucl_same_type::ans >:: copy(host,device,_buffer,cq); } /// Update the first n elements on the host (true for asynchronous copy) inline void update_host(const int n, const bool async) { _ucl_s_obj_help< ucl_same_type::ans >:: copy(host,device,n,_buffer,async); } /// Update the first n elements on the host (using command queue) inline void update_host(const int n, command_queue &cq) { _ucl_s_obj_help< ucl_same_type::ans >:: copy(host,device,n,_buffer,cq); } /// Update slice on the host (true for asynchronous copy) inline void update_host(const int rows, const int cols, const bool async) { _ucl_s_obj_help< ucl_same_type::ans >:: copy(host,device,rows,cols,_buffer,async); } /// Update slice on the host (using command queue) inline void update_host(const int rows, const int cols, command_queue &cq) { _ucl_s_obj_help< ucl_same_type::ans >:: copy(host,device,rows,cols,_buffer,cq); } /// Update the allocation on the device asynchronously inline void update_device() { _ucl_s_obj_help< ucl_same_type::ans >:: copy(device,host,_buffer,true); } /// Update the allocation on the device (true for asynchronous copy) inline void update_device(const bool async) { _ucl_s_obj_help< ucl_same_type::ans >:: copy(device,host,_buffer,async); } /// Update the allocation on the device (using command queue) inline void update_device(command_queue &cq) { _ucl_s_obj_help< ucl_same_type::ans >:: copy(device,host,_buffer,cq); } /// Update the first n elements on the device (true for asynchronous copy) inline void update_device(const int n, const bool async) { _ucl_s_obj_help< ucl_same_type::ans >:: copy(device,host,n,_buffer,async); } /// Update the first n elements on the device (using command queue) inline void update_device(const int n, command_queue &cq) { _ucl_s_obj_help< ucl_same_type::ans >:: copy(device,host,n,_buffer,cq); } /// Update slice on the device (true for asynchronous copy) inline void update_device(const int rows, const int cols, const bool async) { _ucl_s_obj_help< ucl_same_type::ans >:: copy(device,host,rows,cols,_buffer,async); } /// Update slice on the device (using command queue) inline void update_device(const int rows, const int cols, command_queue &cq) { _ucl_s_obj_help< ucl_same_type::ans >:: copy(device,host,rows,cols,_buffer,cq); } private: UCL_H_Mat _buffer; }; #endif