// // Copyright 2015 Pixar // // Licensed under the Apache License, Version 2.0 (the "Apache License") // with the following modification; you may not use this file except in // compliance with the Apache License and the following modification to it: // Section 6. Trademarks. is deleted and replaced with: // // 6. Trademarks. This License does not grant permission to use the trade // names, trademarks, service marks, or product names of the Licensor // and its affiliates, except as required to comply with Section 4(c) of // the License and to reproduce the content of the NOTICE file. // // You may obtain a copy of the Apache License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the Apache License with the above modification is // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the Apache License for the specific // language governing permissions and limitations under the Apache License. // #ifndef OPENSUBDIV3_OSD_CL_EVALUATOR_H #define OPENSUBDIV3_OSD_CL_EVALUATOR_H #include "../version.h" #include "../osd/opencl.h" #include "../osd/types.h" #include "../osd/bufferDescriptor.h" namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { namespace Far { class PatchTable; class StencilTable; class LimitStencilTable; } namespace Osd { /// \brief OpenCL stencil table /// /// This class is an OpenCL buffer representation of Far::StencilTable. /// /// CLCompute consumes this table to apply stencils /// /// class CLStencilTable { public: template static CLStencilTable *Create(Far::StencilTable const *stencilTable, DEVICE_CONTEXT context) { return new CLStencilTable(stencilTable, context->GetContext()); } template static CLStencilTable *Create( Far::LimitStencilTable const *limitStencilTable, DEVICE_CONTEXT context) { return new CLStencilTable(limitStencilTable, context->GetContext()); } CLStencilTable(Far::StencilTable const *stencilTable, cl_context clContext); CLStencilTable(Far::LimitStencilTable const *limitStencilTable, cl_context clContext); ~CLStencilTable(); // interfaces needed for CLComputeKernel cl_mem GetSizesBuffer() const { return _sizes; } cl_mem GetOffsetsBuffer() const { return _offsets; } cl_mem GetIndicesBuffer() const { return _indices; } cl_mem GetWeightsBuffer() const { return _weights; } cl_mem GetDuWeightsBuffer() const { return _duWeights; } cl_mem GetDvWeightsBuffer() const { return _dvWeights; } cl_mem GetDuuWeightsBuffer() const { return _duuWeights; } cl_mem GetDuvWeightsBuffer() const { return _duvWeights; } cl_mem GetDvvWeightsBuffer() const { return _dvvWeights; } int GetNumStencils() const { return _numStencils; } private: cl_mem _sizes; cl_mem _offsets; cl_mem _indices; cl_mem _weights; cl_mem _duWeights; cl_mem _dvWeights; cl_mem _duuWeights; cl_mem _duvWeights; cl_mem _dvvWeights; int _numStencils; }; // --------------------------------------------------------------------------- class CLEvaluator { public: typedef bool Instantiatable; /// Generic creator template. template static CLEvaluator *Create(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc, BufferDescriptor const &dvDesc, DEVICE_CONTEXT deviceContext) { return Create(srcDesc, dstDesc, duDesc, dvDesc, deviceContext->GetContext(), deviceContext->GetCommandQueue()); } static CLEvaluator * Create(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc, BufferDescriptor const &dvDesc, cl_context clContext, cl_command_queue clCommandQueue) { CLEvaluator *instance = new CLEvaluator(clContext, clCommandQueue); if (instance->Compile(srcDesc, dstDesc, duDesc, dvDesc)) return instance; delete instance; return NULL; } /// Generic creator template. template static CLEvaluator *Create(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc, BufferDescriptor const &dvDesc, BufferDescriptor const &duuDesc, BufferDescriptor const &duvDesc, BufferDescriptor const &dvvDesc, DEVICE_CONTEXT deviceContext) { return Create(srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, deviceContext->GetContext(), deviceContext->GetCommandQueue()); } static CLEvaluator * Create(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc, BufferDescriptor const &dvDesc, BufferDescriptor const &duuDesc, BufferDescriptor const &duvDesc, BufferDescriptor const &dvvDesc, cl_context clContext, cl_command_queue clCommandQueue) { CLEvaluator *instance = new CLEvaluator(clContext, clCommandQueue); if (instance->Compile(srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc)) return instance; delete instance; return NULL; } /// Constructor. CLEvaluator(cl_context context, cl_command_queue queue); /// Destructor. ~CLEvaluator(); /// ---------------------------------------------------------------------- /// /// Stencil evaluations with StencilTable /// /// ---------------------------------------------------------------------- /// \brief Generic static stencil function. This function has a same /// signature as other device kernels have so that it can be called /// transparently from OsdMesh template interface. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning the /// cl_mem object for read /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning the /// cl_mem object for results to be written /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param stencilTable stencil table to be applied. The table must have /// SSBO interfaces. /// /// @param instance cached compiled instance. Clients are supposed to /// pre-compile an instance of this class and provide /// to this function. If it's null the kernel still /// compute by instantiating on-demand kernel although /// it may cause a performance problem. /// /// @param deviceContext client providing context class which supports /// cL_context GetContext() /// cl_command_queue GetCommandQueue() /// methods. /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template static bool EvalStencils( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, STENCIL_TABLE const *stencilTable, CLEvaluator const *instance, DEVICE_CONTEXT deviceContext, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) { if (instance) { return instance->EvalStencils(srcBuffer, srcDesc, dstBuffer, dstDesc, stencilTable, numStartEvents, startEvents, endEvent); } else { // Create an instance on demand (slow) instance = Create(srcDesc, dstDesc, BufferDescriptor(), BufferDescriptor(), deviceContext); if (instance) { bool r = instance->EvalStencils(srcBuffer, srcDesc, dstBuffer, dstDesc, stencilTable, numStartEvents, startEvents, endEvent); delete instance; return r; } return false; } } /// \brief Generic static stencil function. This function has a same /// signature as other device kernels have so that it can be called /// transparently from OsdMesh template interface. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning the /// cl_mem object for read /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning the /// cl_mem object for results to be written /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param duBuffer Output buffer derivative wrt u /// must have BindCLBuffer() method returning the /// cl_mem object for du results to be written /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// must have BindCLBuffer() method returning the /// cl_mem object for dv results to be written /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param stencilTable stencil table to be applied. The table must have /// SSBO interfaces. /// /// @param instance cached compiled instance. Clients are supposed to /// pre-compile an instance of this class and provide /// to this function. If it's null the kernel still /// compute by instantiating on-demand kernel although /// it may cause a performance problem. /// /// @param deviceContext client providing context class which supports /// cL_context GetContext() /// cl_command_queue GetCommandQueue() /// methods. /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template static bool EvalStencils( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc, STENCIL_TABLE const *stencilTable, CLEvaluator const *instance, DEVICE_CONTEXT deviceContext, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) { if (instance) { return instance->EvalStencils(srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, stencilTable, numStartEvents, startEvents, endEvent); } else { // Create an instance on demand (slow) instance = Create(srcDesc, dstDesc, duDesc, dvDesc, deviceContext); if (instance) { bool r = instance->EvalStencils(srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, stencilTable, numStartEvents, startEvents, endEvent); delete instance; return r; } return false; } } /// \brief Generic static stencil function. This function has a same /// signature as other device kernels have so that it can be called /// transparently from OsdMesh template interface. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning the /// cl_mem object for read /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning the /// cl_mem object for results to be written /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param duBuffer Output buffer derivative wrt u /// must have BindCLBuffer() method returning the /// cl_mem object for du results to be written /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// must have BindCLBuffer() method returning the /// cl_mem object for dv results to be written /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param duuBuffer Output buffer 2nd derivative wrt u /// must have BindCLBuffer() method returning the /// cl_mem object for du results to be written /// /// @param duuDesc vertex buffer descriptor for the duuBuffer /// /// @param duvBuffer Output buffer 2nd derivative wrt u and v /// must have BindCLBuffer() method returning the /// cl_mem object for du results to be written /// /// @param duvDesc vertex buffer descriptor for the duvBuffer /// /// @param dvvBuffer Output buffer 2nd derivative wrt v /// must have BindCLBuffer() method returning the /// cl_mem object for dv results to be written /// /// @param dvvDesc vertex buffer descriptor for the dvvBuffer /// /// @param stencilTable stencil table to be applied. The table must have /// SSBO interfaces. /// /// @param instance cached compiled instance. Clients are supposed to /// pre-compile an instance of this class and provide /// to this function. If it's null the kernel still /// compute by instantiating on-demand kernel although /// it may cause a performance problem. /// /// @param deviceContext client providing context class which supports /// cL_context GetContext() /// cl_command_queue GetCommandQueue() /// methods. /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template static bool EvalStencils( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc, DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc, DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc, DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc, STENCIL_TABLE const *stencilTable, CLEvaluator const *instance, DEVICE_CONTEXT deviceContext, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) { if (instance) { return instance->EvalStencils(srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, duuBuffer, duuDesc, duvBuffer, duvDesc, dvvBuffer, dvvDesc, stencilTable, numStartEvents, startEvents, endEvent); } else { // Create an instance on demand (slow) instance = Create(srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, deviceContext); if (instance) { bool r = instance->EvalStencils(srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, duuBuffer, duuDesc, duvBuffer, duvDesc, dvvBuffer, dvvDesc, stencilTable, numStartEvents, startEvents, endEvent); delete instance; return r; } return false; } } /// \brief Generic stencil function. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning the /// cl_mem object for read /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning the /// cl_mem object for results to be written /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param stencilTable stencil table to be applied. The table must have /// SSBO interfaces. /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template bool EvalStencils( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, STENCIL_TABLE const *stencilTable, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) const { return EvalStencils(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc, dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc, stencilTable->GetSizesBuffer(), stencilTable->GetOffsetsBuffer(), stencilTable->GetIndicesBuffer(), stencilTable->GetWeightsBuffer(), 0, stencilTable->GetNumStencils(), numStartEvents, startEvents, endEvent); } /// \brief Generic stencil function. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning the /// cl_mem object for read /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning the /// cl_mem object for results to be written /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param duBuffer Output buffer derivative wrt u /// must have BindCLBuffer() method returning the /// cl_mem object for du results to be written /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// must have BindCLBuffer() method returning the /// cl_mem object for dv results to be written /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param stencilTable stencil table to be applied. The table must have /// SSBO interfaces. /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template bool EvalStencils( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc, STENCIL_TABLE const *stencilTable, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) const { return EvalStencils(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc, dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc, duBuffer->BindCLBuffer(_clCommandQueue), duDesc, dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc, stencilTable->GetSizesBuffer(), stencilTable->GetOffsetsBuffer(), stencilTable->GetIndicesBuffer(), stencilTable->GetWeightsBuffer(), stencilTable->GetDuWeightsBuffer(), stencilTable->GetDvWeightsBuffer(), 0, stencilTable->GetNumStencils(), numStartEvents, startEvents, endEvent); } /// \brief Generic stencil function. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning the /// cl_mem object for read /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning the /// cl_mem object for results to be written /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param duBuffer Output buffer derivative wrt u /// must have BindCLBuffer() method returning the /// cl_mem object for du results to be written /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// must have BindCLBuffer() method returning the /// cl_mem object for dv results to be written /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param duuBuffer Output buffer 2nd derivative wrt u /// must have BindCLBuffer() method returning the /// cl_mem object for du results to be written /// /// @param duuDesc vertex buffer descriptor for the duuBuffer /// /// @param duvBuffer Output buffer 2nd derivative wrt u and v /// must have BindCLBuffer() method returning the /// cl_mem object for du results to be written /// /// @param duvDesc vertex buffer descriptor for the duvBuffer /// /// @param dvvBuffer Output buffer 2nd derivative wrt v /// must have BindCLBuffer() method returning the /// cl_mem object for dv results to be written /// /// @param dvvDesc vertex buffer descriptor for the dvvBuffer /// /// @param stencilTable stencil table to be applied. The table must have /// SSBO interfaces. /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template bool EvalStencils( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc, DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc, DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc, DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc, STENCIL_TABLE const *stencilTable, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) const { return EvalStencils(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc, dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc, duBuffer->BindCLBuffer(_clCommandQueue), duDesc, dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc, duuBuffer->BindCLBuffer(_clCommandQueue), duuDesc, duvBuffer->BindCLBuffer(_clCommandQueue), duvDesc, dvvBuffer->BindCLBuffer(_clCommandQueue), dvvDesc, stencilTable->GetSizesBuffer(), stencilTable->GetOffsetsBuffer(), stencilTable->GetIndicesBuffer(), stencilTable->GetWeightsBuffer(), stencilTable->GetDuWeightsBuffer(), stencilTable->GetDvWeightsBuffer(), stencilTable->GetDuuWeightsBuffer(), stencilTable->GetDuvWeightsBuffer(), stencilTable->GetDvvWeightsBuffer(), 0, stencilTable->GetNumStencils(), numStartEvents, startEvents, endEvent); } /// Dispatch the CL compute kernel asynchronously. /// returns false if the kernel hasn't been compiled yet. bool EvalStencils(cl_mem src, BufferDescriptor const &srcDesc, cl_mem dst, BufferDescriptor const &dstDesc, cl_mem sizes, cl_mem offsets, cl_mem indices, cl_mem weights, int start, int end, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) const; /// \brief Dispatch the CL compute kernel asynchronously. /// returns false if the kernel hasn't been compiled yet. /// /// @param src CL buffer of input primvar source data /// /// @param srcDesc vertex buffer descriptor for the srcBuffer /// /// @param dst CL buffer of output primvar destination data /// /// @param dstDesc vertex buffer descriptor for the dstBuffer /// /// @param du CL buffer of output derivative wrt u /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dv CL buffer of output derivative wrt v /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param sizes CL buffer of the sizes in the stencil table /// /// @param offsets CL buffer of the offsets in the stencil table /// /// @param indices CL buffer of the indices in the stencil table /// /// @param weights CL buffer of the weights in the stencil table /// /// @param duWeights CL buffer of the du weights in the stencil table /// /// @param dvWeights CL buffer of the dv weights in the stencil table /// /// @param start start index of stencil table /// /// @param end end index of stencil table /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// bool EvalStencils(cl_mem src, BufferDescriptor const &srcDesc, cl_mem dst, BufferDescriptor const &dstDesc, cl_mem du, BufferDescriptor const &duDesc, cl_mem dv, BufferDescriptor const &dvDesc, cl_mem sizes, cl_mem offsets, cl_mem indices, cl_mem weights, cl_mem duWeights, cl_mem dvWeights, int start, int end, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) const; /// \brief Dispatch the CL compute kernel asynchronously. /// returns false if the kernel hasn't been compiled yet. /// /// @param src CL buffer of input primvar source data /// /// @param srcDesc vertex buffer descriptor for the srcBuffer /// /// @param dst CL buffer of output primvar destination data /// /// @param dstDesc vertex buffer descriptor for the dstBuffer /// /// @param du CL buffer of output derivative wrt u /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dv CL buffer of output derivative wrt v /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param duu CL buffer of output 2nd derivative wrt u /// /// @param duuDesc vertex buffer descriptor for the duuBuffer /// /// @param duv CL buffer of output 2nd derivative wrt u and v /// /// @param duvDesc vertex buffer descriptor for the duvBuffer /// /// @param dvv CL buffer of output 2nd derivative wrt v /// /// @param dvvDesc vertex buffer descriptor for the dvvBuffer /// /// @param sizes CL buffer of the sizes in the stencil table /// /// @param offsets CL buffer of the offsets in the stencil table /// /// @param indices CL buffer of the indices in the stencil table /// /// @param weights CL buffer of the weights in the stencil table /// /// @param duWeights CL buffer of the du weights in the stencil table /// /// @param dvWeights CL buffer of the dv weights in the stencil table /// /// @param duuWeights CL buffer of the duu weights in the stencil table /// /// @param duvWeights CL buffer of the duv weights in the stencil table /// /// @param dvvWeights CL buffer of the dvv weights in the stencil table /// /// @param start start index of stencil table /// /// @param end end index of stencil table /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// bool EvalStencils(cl_mem src, BufferDescriptor const &srcDesc, cl_mem dst, BufferDescriptor const &dstDesc, cl_mem du, BufferDescriptor const &duDesc, cl_mem dv, BufferDescriptor const &dvDesc, cl_mem duu, BufferDescriptor const &duuDesc, cl_mem duv, BufferDescriptor const &duvDesc, cl_mem dvv, BufferDescriptor const &dvvDesc, cl_mem sizes, cl_mem offsets, cl_mem indices, cl_mem weights, cl_mem duWeights, cl_mem dvWeights, cl_mem duuWeights, cl_mem duvWeights, cl_mem dvvWeights, int start, int end, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) const; /// ---------------------------------------------------------------------- /// /// Limit evaluations with PatchTable /// /// ---------------------------------------------------------------------- /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning a CL /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindCLBuffer() method returning an /// array of PatchCoord struct. /// /// @param patchTable CLPatchTable or equivalent /// /// @param instance cached compiled instance. Clients are supposed to /// pre-compile an instance of this class and provide /// to this function. If it's null the kernel still /// compute by instantiating on-demand kernel although /// it may cause a performance problem. /// /// @param deviceContext client providing context class which supports /// cL_context GetContext() /// cl_command_queue GetCommandQueue() /// methods. /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template static bool EvalPatches( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, CLEvaluator const *instance, DEVICE_CONTEXT deviceContext, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) { if (instance) { return instance->EvalPatches(srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable, numStartEvents, startEvents, endEvent); } else { // Create an instance on demand (slow) (void)deviceContext; // unused instance = Create(srcDesc, dstDesc, BufferDescriptor(), BufferDescriptor(), deviceContext); if (instance) { bool r = instance->EvalPatches(srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable, numStartEvents, startEvents, endEvent); delete instance; return r; } return false; } } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning a CL /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param duBuffer Output buffer derivative wrt u /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindCLBuffer() method returning an /// array of PatchCoord struct /// /// @param patchTable CLPatchTable or equivalent /// /// @param instance cached compiled instance. Clients are supposed to /// pre-compile an instance of this class and provide /// to this function. If it's null the kernel still /// compute by instantiating on-demand kernel although /// it may cause a performance problem. /// /// @param deviceContext client providing context class which supports /// cL_context GetContext() /// cl_command_queue GetCommandQueue() /// methods. /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template static bool EvalPatches( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, CLEvaluator const *instance, DEVICE_CONTEXT deviceContext, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) { if (instance) { return instance->EvalPatches(srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, numPatchCoords, patchCoords, patchTable, numStartEvents, startEvents, endEvent); } else { // Create an instance on demand (slow) (void)deviceContext; // unused instance = Create(srcDesc, dstDesc, duDesc, dvDesc, deviceContext); if (instance) { bool r = instance->EvalPatches(srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, numPatchCoords, patchCoords, patchTable, numStartEvents, startEvents, endEvent); delete instance; return r; } return false; } } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning a CL /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param duBuffer Output buffer derivative wrt u /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param duuBuffer Output buffer 2nd derivative wrt u /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duuDesc vertex buffer descriptor for the duuBuffer /// /// @param duvBuffer Output buffer 2nd derivative wrt u and v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duvDesc vertex buffer descriptor for the duvBuffer /// /// @param dvvBuffer Output buffer 2nd derivative wrt v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dvvDesc vertex buffer descriptor for the dvvBuffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindCLBuffer() method returning an /// array of PatchCoord struct /// /// @param patchTable CLPatchTable or equivalent /// /// @param instance cached compiled instance. Clients are supposed to /// pre-compile an instance of this class and provide /// to this function. If it's null the kernel still /// compute by instantiating on-demand kernel although /// it may cause a performance problem. /// /// @param deviceContext client providing context class which supports /// cL_context GetContext() /// cl_command_queue GetCommandQueue() /// methods. /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template static bool EvalPatches( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc, DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc, DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc, DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, CLEvaluator const *instance, DEVICE_CONTEXT deviceContext, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) { if (instance) { return instance->EvalPatches(srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, duuBuffer, duuDesc, duvBuffer, duvDesc, dvvBuffer, dvvDesc, numPatchCoords, patchCoords, patchTable, numStartEvents, startEvents, endEvent); } else { // Create an instance on demand (slow) (void)deviceContext; // unused instance = Create(srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, deviceContext); if (instance) { bool r = instance->EvalPatches(srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, duuBuffer, duuDesc, duvBuffer, duvDesc, dvvBuffer, dvvDesc, numPatchCoords, patchCoords, patchTable, numStartEvents, startEvents, endEvent); delete instance; return r; } return false; } } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning a CL /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindCLBuffer() method returning an /// array of PatchCoord struct. /// /// @param patchTable CLPatchTable or equivalent /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template bool EvalPatches( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) const { return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc, dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc, 0, BufferDescriptor(), 0, BufferDescriptor(), numPatchCoords, patchCoords->BindCLBuffer(_clCommandQueue), patchTable->GetPatchArrayBuffer(), patchTable->GetPatchIndexBuffer(), patchTable->GetPatchParamBuffer(), numStartEvents, startEvents, endEvent); } /// \brief Generic limit eval function with derivatives. This function has /// a same signature as other device kernels have so that it can be /// called in the same way. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning a CL /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param duBuffer Output buffer derivative wrt u /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// /// @param patchTable CLPatchTable or equivalent /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template bool EvalPatches( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) const { return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc, dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc, duBuffer->BindCLBuffer(_clCommandQueue), duDesc, dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc, numPatchCoords, patchCoords->BindCLBuffer(_clCommandQueue), patchTable->GetPatchArrayBuffer(), patchTable->GetPatchIndexBuffer(), patchTable->GetPatchParamBuffer(), numStartEvents, startEvents, endEvent); } /// \brief Generic limit eval function with derivatives. This function has /// a same signature as other device kernels have so that it can be /// called in the same way. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning a CL /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param duBuffer Output buffer derivative wrt u /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param duuBuffer Output buffer 2nd derivative wrt u /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duuDesc vertex buffer descriptor for the duuBuffer /// /// @param duvBuffer Output buffer 2nd derivative wrt u and v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duvDesc vertex buffer descriptor for the duvBuffer /// /// @param dvvBuffer Output buffer 2nd derivative wrt v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dvvDesc vertex buffer descriptor for the dvvBuffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// /// @param patchTable CLPatchTable or equivalent /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template bool EvalPatches( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc, DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc, DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc, DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) const { return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc, dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc, duBuffer->BindCLBuffer(_clCommandQueue), duDesc, dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc, duuBuffer->BindCLBuffer(_clCommandQueue), duuDesc, duvBuffer->BindCLBuffer(_clCommandQueue), duvDesc, dvvBuffer->BindCLBuffer(_clCommandQueue), dvvDesc, numPatchCoords, patchCoords->BindCLBuffer(_clCommandQueue), patchTable->GetPatchArrayBuffer(), patchTable->GetPatchIndexBuffer(), patchTable->GetPatchParamBuffer(), numStartEvents, startEvents, endEvent); } bool EvalPatches(cl_mem src, BufferDescriptor const &srcDesc, cl_mem dst, BufferDescriptor const &dstDesc, cl_mem du, BufferDescriptor const &duDesc, cl_mem dv, BufferDescriptor const &dvDesc, int numPatchCoords, cl_mem patchCoordsBuffer, cl_mem patchArrayBuffer, cl_mem patchIndexBuffer, cl_mem patchParamsBuffer, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) const; bool EvalPatches(cl_mem src, BufferDescriptor const &srcDesc, cl_mem dst, BufferDescriptor const &dstDesc, cl_mem du, BufferDescriptor const &duDesc, cl_mem dv, BufferDescriptor const &dvDesc, cl_mem duu, BufferDescriptor const &duuDesc, cl_mem duv, BufferDescriptor const &duvDesc, cl_mem dvv, BufferDescriptor const &dvvDesc, int numPatchCoords, cl_mem patchCoordsBuffer, cl_mem patchArrayBuffer, cl_mem patchIndexBuffer, cl_mem patchParamsBuffer, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) const; /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning a CL /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindCLBuffer() method returning an /// array of PatchCoord struct. /// /// @param patchTable CLPatchTable or equivalent /// /// @param instance cached compiled instance. Clients are supposed to /// pre-compile an instance of this class and provide /// to this function. If it's null the kernel still /// compute by instantiating on-demand kernel although /// it may cause a performance problem. /// /// @param deviceContext client providing context class which supports /// cL_context GetContext() /// cl_command_queue GetCommandQueue() /// methods. /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template static bool EvalPatchesVarying( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, CLEvaluator const *instance, DEVICE_CONTEXT deviceContext, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) { if (instance) { return instance->EvalPatchesVarying( srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable, numStartEvents, startEvents, endEvent); } else { // Create an instance on demand (slow) (void)deviceContext; // unused instance = Create(srcDesc, dstDesc, BufferDescriptor(), BufferDescriptor(), deviceContext); if (instance) { bool r = instance->EvalPatchesVarying( srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable, numStartEvents, startEvents, endEvent); delete instance; return r; } return false; } } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning a CL /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindCLBuffer() method returning an /// array of PatchCoord struct. /// /// @param patchTable CLPatchTable or equivalent /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template bool EvalPatchesVarying( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) const { return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc, dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc, 0, BufferDescriptor(), 0, BufferDescriptor(), numPatchCoords, patchCoords->BindCLBuffer(_clCommandQueue), patchTable->GetVaryingPatchArrayBuffer(), patchTable->GetVaryingPatchIndexBuffer(), patchTable->GetPatchParamBuffer(), numStartEvents, startEvents, endEvent); } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning a CL /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param duBuffer Output buffer derivative wrt u /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindCLBuffer() method returning an /// array of PatchCoord struct. /// /// @param patchTable CLPatchTable or equivalent /// /// @param instance cached compiled instance. Clients are supposed to /// pre-compile an instance of this class and provide /// to this function. If it's null the kernel still /// compute by instantiating on-demand kernel although /// it may cause a performance problem. /// /// @param deviceContext client providing context class which supports /// cL_context GetContext() /// cl_command_queue GetCommandQueue() /// methods. /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template static bool EvalPatchesVarying( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, CLEvaluator const *instance, DEVICE_CONTEXT deviceContext, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) { if (instance) { return instance->EvalPatchesVarying( srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, numPatchCoords, patchCoords, patchTable, numStartEvents, startEvents, endEvent); } else { // Create an instance on demand (slow) (void)deviceContext; // unused instance = Create(srcDesc, dstDesc, duDesc, dvDesc, deviceContext); if (instance) { bool r = instance->EvalPatchesVarying( srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, numPatchCoords, patchCoords, patchTable, numStartEvents, startEvents, endEvent); delete instance; return r; } return false; } } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning a CL /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param duBuffer Output buffer derivative wrt u /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindCLBuffer() method returning an /// array of PatchCoord struct. /// /// @param patchTable CLPatchTable or equivalent /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template bool EvalPatchesVarying( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) const { return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc, dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc, duBuffer->BindCLBuffer(_clCommandQueue), duDesc, dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc, numPatchCoords, patchCoords->BindCLBuffer(_clCommandQueue), patchTable->GetVaryingPatchArrayBuffer(), patchTable->GetVaryingPatchIndexBuffer(), patchTable->GetPatchParamBuffer(), numStartEvents, startEvents, endEvent); } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning a CL /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param duBuffer Output buffer derivative wrt u /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param duuBuffer Output buffer 2nd derivative wrt u /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duuDesc vertex buffer descriptor for the duuBuffer /// /// @param duvBuffer Output buffer 2nd derivative wrt u and v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duvDesc vertex buffer descriptor for the duvBuffer /// /// @param dvvBuffer Output buffer 2nd derivative wrt v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dvvDesc vertex buffer descriptor for the dvvBuffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindCLBuffer() method returning an /// array of PatchCoord struct. /// /// @param patchTable CLPatchTable or equivalent /// /// @param instance cached compiled instance. Clients are supposed to /// pre-compile an instance of this class and provide /// to this function. If it's null the kernel still /// compute by instantiating on-demand kernel although /// it may cause a performance problem. /// /// @param deviceContext client providing context class which supports /// cL_context GetContext() /// cl_command_queue GetCommandQueue() /// methods. /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template static bool EvalPatchesVarying( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc, DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc, DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc, DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, CLEvaluator const *instance, DEVICE_CONTEXT deviceContext, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) { if (instance) { return instance->EvalPatchesVarying( srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, duuBuffer, duuDesc, duvBuffer, duvDesc, dvvBuffer, dvvDesc, numPatchCoords, patchCoords, patchTable, numStartEvents, startEvents, endEvent); } else { // Create an instance on demand (slow) (void)deviceContext; // unused instance = Create(srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, deviceContext); if (instance) { bool r = instance->EvalPatchesVarying( srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, duuBuffer, duuDesc, duvBuffer, duvDesc, dvvBuffer, dvvDesc, numPatchCoords, patchCoords, patchTable, numStartEvents, startEvents, endEvent); delete instance; return r; } return false; } } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning a CL /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param duBuffer Output buffer derivative wrt u /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param duuBuffer Output buffer 2nd derivative wrt u /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duuDesc vertex buffer descriptor for the duuBuffer /// /// @param duvBuffer Output buffer 2nd derivative wrt u and v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duvDesc vertex buffer descriptor for the duvBuffer /// /// @param dvvBuffer Output buffer 2nd derivative wrt v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dvvDesc vertex buffer descriptor for the dvvBuffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindCLBuffer() method returning an /// array of PatchCoord struct. /// /// @param patchTable CLPatchTable or equivalent /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template bool EvalPatchesVarying( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc, DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc, DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc, DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) const { return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc, dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc, duBuffer->BindCLBuffer(_clCommandQueue), duDesc, dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc, duuBuffer->BindCLBuffer(_clCommandQueue), duuDesc, duvBuffer->BindCLBuffer(_clCommandQueue), duvDesc, dvvBuffer->BindCLBuffer(_clCommandQueue), dvvDesc, numPatchCoords, patchCoords->BindCLBuffer(_clCommandQueue), patchTable->GetVaryingPatchArrayBuffer(), patchTable->GetVaryingPatchIndexBuffer(), patchTable->GetPatchParamBuffer(), numStartEvents, startEvents, endEvent); } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning a CL /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindCLBuffer() method returning an /// array of PatchCoord struct. /// /// @param patchTable CLPatchTable or equivalent /// /// @param fvarChannel face-varying channel /// /// @param instance cached compiled instance. Clients are supposed to /// pre-compile an instance of this class and provide /// to this function. If it's null the kernel still /// compute by instantiating on-demand kernel although /// it may cause a performance problem. /// /// @param deviceContext client providing context class which supports /// cL_context GetContext() /// cl_command_queue GetCommandQueue() /// methods. /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template static bool EvalPatchesFaceVarying( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, int fvarChannel, CLEvaluator const *instance, DEVICE_CONTEXT deviceContext, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) { if (instance) { return instance->EvalPatchesFaceVarying( srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable, fvarChannel, numStartEvents, startEvents, endEvent); } else { // Create an instance on demand (slow) (void)deviceContext; // unused instance = Create(srcDesc, dstDesc, BufferDescriptor(), BufferDescriptor(), deviceContext); if (instance) { bool r = instance->EvalPatchesFaceVarying( srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable, fvarChannel, numStartEvents, startEvents, endEvent); delete instance; return r; } return false; } } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning a CL /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindCLBuffer() method returning an /// array of PatchCoord struct. /// /// @param patchTable CLPatchTable or equivalent /// /// @param fvarChannel face-varying channel /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template bool EvalPatchesFaceVarying( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, int fvarChannel = 0, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) const { return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc, dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc, 0, BufferDescriptor(), 0, BufferDescriptor(), numPatchCoords, patchCoords->BindCLBuffer(_clCommandQueue), patchTable->GetFVarPatchArrayBuffer(fvarChannel), patchTable->GetFVarPatchIndexBuffer(fvarChannel), patchTable->GetFVarPatchParamBuffer(fvarChannel), numStartEvents, startEvents, endEvent); } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning a CL /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param duBuffer Output buffer derivative wrt u /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindCLBuffer() method returning an /// array of PatchCoord struct. /// /// @param patchTable CLPatchTable or equivalent /// /// @param fvarChannel face-varying channel /// /// @param instance cached compiled instance. Clients are supposed to /// pre-compile an instance of this class and provide /// to this function. If it's null the kernel still /// compute by instantiating on-demand kernel although /// it may cause a performance problem. /// /// @param deviceContext client providing context class which supports /// cL_context GetContext() /// cl_command_queue GetCommandQueue() /// methods. /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template static bool EvalPatchesFaceVarying( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, int fvarChannel, CLEvaluator const *instance, DEVICE_CONTEXT deviceContext, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) { if (instance) { return instance->EvalPatchesFaceVarying( srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, numPatchCoords, patchCoords, patchTable, fvarChannel, numStartEvents, startEvents, endEvent); } else { // Create an instance on demand (slow) (void)deviceContext; // unused instance = Create(srcDesc, dstDesc, duDesc, dvDesc, deviceContext); if (instance) { bool r = instance->EvalPatchesFaceVarying( srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, numPatchCoords, patchCoords, patchTable, fvarChannel, numStartEvents, startEvents, endEvent); delete instance; return r; } return false; } } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning a CL /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param duBuffer Output buffer derivative wrt u /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindCLBuffer() method returning an /// array of PatchCoord struct. /// /// @param patchTable CLPatchTable or equivalent /// /// @param fvarChannel face-varying channel /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template bool EvalPatchesFaceVarying( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, int fvarChannel = 0, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) const { return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc, dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc, duBuffer->BindCLBuffer(_clCommandQueue), duDesc, dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc, numPatchCoords, patchCoords->BindCLBuffer(_clCommandQueue), patchTable->GetFVarPatchArrayBuffer(fvarChannel), patchTable->GetFVarPatchIndexBuffer(fvarChannel), patchTable->GetFVarPatchParamBuffer(fvarChannel), numStartEvents, startEvents, endEvent); } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning a CL /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param duBuffer Output buffer derivative wrt u /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param duuBuffer Output buffer 2nd derivative wrt u /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duuDesc vertex buffer descriptor for the duuBuffer /// /// @param duvBuffer Output buffer 2nd derivative wrt u and v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duvDesc vertex buffer descriptor for the duvBuffer /// /// @param dvvBuffer Output buffer 2nd derivative wrt v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dvvDesc vertex buffer descriptor for the dvvBuffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindCLBuffer() method returning an /// array of PatchCoord struct. /// /// @param patchTable CLPatchTable or equivalent /// /// @param fvarChannel face-varying channel /// /// @param instance cached compiled instance. Clients are supposed to /// pre-compile an instance of this class and provide /// to this function. If it's null the kernel still /// compute by instantiating on-demand kernel although /// it may cause a performance problem. /// /// @param deviceContext client providing context class which supports /// cL_context GetContext() /// cl_command_queue GetCommandQueue() /// methods. /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template static bool EvalPatchesFaceVarying( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc, DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc, DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc, DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, int fvarChannel, CLEvaluator const *instance, DEVICE_CONTEXT deviceContext, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) { if (instance) { return instance->EvalPatchesFaceVarying( srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, duuBuffer, duuDesc, duvBuffer, duvDesc, dvvBuffer, dvvDesc, numPatchCoords, patchCoords, patchTable, fvarChannel, numStartEvents, startEvents, endEvent); } else { // Create an instance on demand (slow) (void)deviceContext; // unused instance = Create(srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, deviceContext); if (instance) { bool r = instance->EvalPatchesFaceVarying( srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, duuBuffer, duuDesc, duvBuffer, duvDesc, dvvBuffer, dvvDesc, numPatchCoords, patchCoords, patchTable, fvarChannel, numStartEvents, startEvents, endEvent); delete instance; return r; } return false; } } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// must have BindCLBuffer() method returning a CL /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param duBuffer Output buffer derivative wrt u /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param duuBuffer Output buffer 2nd derivative wrt u /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duuDesc vertex buffer descriptor for the duuBuffer /// /// @param duvBuffer Output buffer 2nd derivative wrt u and v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param duvDesc vertex buffer descriptor for the duvBuffer /// /// @param dvvBuffer Output buffer 2nd derivative wrt v /// must have BindCLBuffer() method returning a CL /// buffer object of destination data /// /// @param dvvDesc vertex buffer descriptor for the dvvBuffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindCLBuffer() method returning an /// array of PatchCoord struct. /// /// @param patchTable CLPatchTable or equivalent /// /// @param fvarChannel face-varying channel /// /// @param numStartEvents the number of events in the array pointed to by /// startEvents. /// /// @param startEvents points to an array of cl_event which will determine /// when it is safe for the OpenCL device to begin work /// or NULL if it can begin immediately. /// /// @param endEvent pointer to a cl_event which will receive a copy of /// the cl_event which indicates when all work for this /// call has completed. This cl_event has an incremented /// reference count and should be released via /// clReleaseEvent(). NULL if not required. /// template bool EvalPatchesFaceVarying( SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc, DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc, DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc, DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, int fvarChannel = 0, unsigned int numStartEvents=0, const cl_event* startEvents=NULL, cl_event* endEvent=NULL) const { return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc, dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc, duBuffer->BindCLBuffer(_clCommandQueue), duDesc, dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc, duuBuffer->BindCLBuffer(_clCommandQueue), duuDesc, duvBuffer->BindCLBuffer(_clCommandQueue), duvDesc, dvvBuffer->BindCLBuffer(_clCommandQueue), dvvDesc, numPatchCoords, patchCoords->BindCLBuffer(_clCommandQueue), patchTable->GetFVarPatchArrayBuffer(fvarChannel), patchTable->GetFVarPatchIndexBuffer(fvarChannel), patchTable->GetFVarPatchParamBuffer(fvarChannel), numStartEvents, startEvents, endEvent); } /// ---------------------------------------------------------------------- /// /// Other methods /// /// ---------------------------------------------------------------------- /// Configure OpenCL kernel. /// Returns false if it fails to compile the kernel. bool Compile(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc = BufferDescriptor(), BufferDescriptor const &dvDesc = BufferDescriptor(), BufferDescriptor const &duuDesc = BufferDescriptor(), BufferDescriptor const &duvDesc = BufferDescriptor(), BufferDescriptor const &dvvDesc = BufferDescriptor()); /// Wait the OpenCL kernels finish. template static void Synchronize(DEVICE_CONTEXT deviceContext) { Synchronize(deviceContext->GetCommandQueue()); } static void Synchronize(cl_command_queue queue); private: cl_context _clContext; cl_command_queue _clCommandQueue; cl_program _program; cl_kernel _stencilKernel; cl_kernel _stencilDerivKernel; cl_kernel _patchKernel; }; } // end namespace Osd } // end namespace OPENSUBDIV_VERSION using namespace OPENSUBDIV_VERSION; } // end namespace OpenSubdiv #endif // OPENSUBDIV3_OSD_CL_EVALUATOR_H