// // Copyright 2015 Pixar // // Licensed under the Apache License, Version 2.0 (the "Apache License") // with the following modification; you may not use this file except in // compliance with the Apache License and the following modification to it: // Section 6. Trademarks. is deleted and replaced with: // // 6. Trademarks. This License does not grant permission to use the trade // names, trademarks, service marks, or product names of the Licensor // and its affiliates, except as required to comply with Section 4(c) of // the License and to reproduce the content of the NOTICE file. // // You may obtain a copy of the Apache License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the Apache License with the above modification is // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the Apache License for the specific // language governing permissions and limitations under the Apache License. // #include "glLoader.h" #include "../osd/glComputeEvaluator.h" #include "../osd/glslPatchShaderSource.h" #include "../far/error.h" #include "../far/stencilTable.h" #include #include #include #include namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { namespace Osd { static const char *shaderSource = #include "../osd/glslComputeKernel.gen.h" ; template GLuint createSSBO(std::vector const & src) { if (src.empty()) { return 0; } GLuint devicePtr = 0; #if defined(GL_ARB_direct_state_access) if (OSD_OPENGL_HAS(ARB_direct_state_access)) { glCreateBuffers(1, &devicePtr); glNamedBufferData(devicePtr, src.size()*sizeof(T), &src.at(0), GL_STATIC_DRAW); } else #endif { GLint prev = 0; glGetIntegerv(GL_SHADER_STORAGE_BUFFER_BINDING, &prev); glGenBuffers(1, &devicePtr); glBindBuffer(GL_SHADER_STORAGE_BUFFER, devicePtr); glBufferData(GL_SHADER_STORAGE_BUFFER, src.size()*sizeof(T), &src.at(0), GL_STATIC_DRAW); glBindBuffer(GL_SHADER_STORAGE_BUFFER, prev); } return devicePtr; } GLStencilTableSSBO::GLStencilTableSSBO( Far::StencilTable const *stencilTable) { _numStencils = stencilTable->GetNumStencils(); if (_numStencils > 0) { _sizes = createSSBO(stencilTable->GetSizes()); _offsets = createSSBO(stencilTable->GetOffsets()); _indices = createSSBO(stencilTable->GetControlIndices()); _weights = createSSBO(stencilTable->GetWeights()); _duWeights = _dvWeights = 0; _duuWeights = _duvWeights = _dvvWeights = 0; } else { _sizes = _offsets = _indices = _weights = 0; _duWeights = _dvWeights = 0; _duuWeights = _duvWeights = _dvvWeights = 0; } } GLStencilTableSSBO::GLStencilTableSSBO( Far::LimitStencilTable const *limitStencilTable) { _numStencils = limitStencilTable->GetNumStencils(); if (_numStencils > 0) { _sizes = createSSBO(limitStencilTable->GetSizes()); _offsets = createSSBO(limitStencilTable->GetOffsets()); _indices = createSSBO(limitStencilTable->GetControlIndices()); _weights = createSSBO(limitStencilTable->GetWeights()); _duWeights = createSSBO(limitStencilTable->GetDuWeights()); _dvWeights = createSSBO(limitStencilTable->GetDvWeights()); _duuWeights = createSSBO(limitStencilTable->GetDuuWeights()); _duvWeights = createSSBO(limitStencilTable->GetDuvWeights()); _dvvWeights = createSSBO(limitStencilTable->GetDvvWeights()); } else { _sizes = _offsets = _indices = _weights = 0; _duWeights = _dvWeights = 0; _duuWeights = _duvWeights = _dvvWeights = 0; } } GLStencilTableSSBO::~GLStencilTableSSBO() { if (_sizes) glDeleteBuffers(1, &_sizes); if (_offsets) glDeleteBuffers(1, &_offsets); if (_indices) glDeleteBuffers(1, &_indices); if (_weights) glDeleteBuffers(1, &_weights); if (_duWeights) glDeleteBuffers(1, &_duWeights); if (_dvWeights) glDeleteBuffers(1, &_dvWeights); if (_duuWeights) glDeleteBuffers(1, &_duuWeights); if (_duvWeights) glDeleteBuffers(1, &_duvWeights); if (_dvvWeights) glDeleteBuffers(1, &_dvvWeights); } // --------------------------------------------------------------------------- GLComputeEvaluator::GLComputeEvaluator() : _workGroupSize(64), _patchArraysSSBO(0) { std::memset((void*) &_stencilKernel, 0, sizeof(_stencilKernel)); std::memset((void*) &_patchKernel, 0, sizeof(_patchKernel)); // Initialize internal OpenGL loader library if necessary OpenSubdiv::internal::GLLoader::libraryInitializeGL(); } GLComputeEvaluator::~GLComputeEvaluator() { if (_patchArraysSSBO) { glDeleteBuffers(1, &_patchArraysSSBO); } } static GLuint compileKernel(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const & duDesc, BufferDescriptor const & dvDesc, BufferDescriptor const & duuDesc, BufferDescriptor const & duvDesc, BufferDescriptor const & dvvDesc, const char *kernelDefine, int workGroupSize) { GLuint program = glCreateProgram(); GLuint shader = glCreateShader(GL_COMPUTE_SHADER); std::string patchBasisShaderSource = GLSLPatchShaderSource::GetPatchBasisShaderSource(); const char *patchBasisShaderSourceDefine = "#define OSD_PATCH_BASIS_GLSL\n"; std::ostringstream defines; defines << "#define LENGTH " << srcDesc.length << "\n" << "#define SRC_STRIDE " << srcDesc.stride << "\n" << "#define DST_STRIDE " << dstDesc.stride << "\n" << "#define WORK_GROUP_SIZE " << workGroupSize << "\n" << kernelDefine << "\n" << patchBasisShaderSourceDefine << "\n"; bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0); bool deriv2 = (duuDesc.length > 0 || duvDesc.length > 0 || dvvDesc.length > 0); if (deriv1) { defines << "#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n"; } if (deriv2) { defines << "#define OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES\n"; } std::string defineStr = defines.str(); const char *shaderSources[4] = {"#version 430\n", 0, 0, 0}; shaderSources[1] = defineStr.c_str(); shaderSources[2] = patchBasisShaderSource.c_str(); shaderSources[3] = shaderSource; glShaderSource(shader, 4, shaderSources, NULL); glCompileShader(shader); glAttachShader(program, shader); GLint linked = 0; glLinkProgram(program); glGetProgramiv(program, GL_LINK_STATUS, &linked); if (linked == GL_FALSE) { char buffer[1024]; glGetShaderInfoLog(shader, 1024, NULL, buffer); Far::Error(Far::FAR_RUNTIME_ERROR, buffer); glGetProgramInfoLog(program, 1024, NULL, buffer); Far::Error(Far::FAR_RUNTIME_ERROR, buffer); glDeleteProgram(program); return 0; } glDeleteShader(shader); return program; } bool GLComputeEvaluator::Compile(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc, BufferDescriptor const &dvDesc, BufferDescriptor const &duuDesc, BufferDescriptor const &duvDesc, BufferDescriptor const &dvvDesc) { // create a stencil kernel if (!_stencilKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, _workGroupSize)) { return false; } // create a patch kernel if (!_patchKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, _workGroupSize)) { return false; } // create a patch arrays buffer if (!_patchArraysSSBO) { glGenBuffers(1, &_patchArraysSSBO); } return true; } /* static */ void GLComputeEvaluator::Synchronize(void * /*kernel*/) { // XXX: this is currently just for the performance measuring purpose. // need to be reimplemented by fence and sync. glFinish(); } bool GLComputeEvaluator::EvalStencils( GLuint srcBuffer, BufferDescriptor const &srcDesc, GLuint dstBuffer, BufferDescriptor const &dstDesc, GLuint duBuffer, BufferDescriptor const &duDesc, GLuint dvBuffer, BufferDescriptor const &dvDesc, GLuint sizesBuffer, GLuint offsetsBuffer, GLuint indicesBuffer, GLuint weightsBuffer, GLuint duWeightsBuffer, GLuint dvWeightsBuffer, int start, int end) const { return EvalStencils(srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, 0, BufferDescriptor(), 0, BufferDescriptor(), 0, BufferDescriptor(), sizesBuffer, offsetsBuffer, indicesBuffer, weightsBuffer, duWeightsBuffer, dvWeightsBuffer, 0, 0, 0, start, end); } bool GLComputeEvaluator::EvalStencils( GLuint srcBuffer, BufferDescriptor const &srcDesc, GLuint dstBuffer, BufferDescriptor const &dstDesc, GLuint duBuffer, BufferDescriptor const &duDesc, GLuint dvBuffer, BufferDescriptor const &dvDesc, GLuint duuBuffer, BufferDescriptor const &duuDesc, GLuint duvBuffer, BufferDescriptor const &duvDesc, GLuint dvvBuffer, BufferDescriptor const &dvvDesc, GLuint sizesBuffer, GLuint offsetsBuffer, GLuint indicesBuffer, GLuint weightsBuffer, GLuint duWeightsBuffer, GLuint dvWeightsBuffer, GLuint duuWeightsBuffer, GLuint duvWeightsBuffer, GLuint dvvWeightsBuffer, int start, int end) const { if (!_stencilKernel.program) return false; int count = end - start; if (count <= 0) { return true; } glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, srcBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, dstBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, duBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, dvBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, duuBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, duvBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, dvvBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, sizesBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, offsetsBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, indicesBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 7, weightsBuffer); if (duWeightsBuffer) glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 8, duWeightsBuffer); if (dvWeightsBuffer) glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 9, dvWeightsBuffer); if (duuWeightsBuffer) glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 13, duuWeightsBuffer); if (duvWeightsBuffer) glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 14, duvWeightsBuffer); if (dvvWeightsBuffer) glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 15, dvvWeightsBuffer); GLint activeProgram; glGetIntegerv(GL_CURRENT_PROGRAM, &activeProgram); glUseProgram(_stencilKernel.program); glUniform1i(_stencilKernel.uniformStart, start); glUniform1i(_stencilKernel.uniformEnd, end); glUniform1i(_stencilKernel.uniformSrcOffset, srcDesc.offset); glUniform1i(_stencilKernel.uniformDstOffset, dstDesc.offset); if (_stencilKernel.uniformDuDesc > 0) { glUniform3i(_stencilKernel.uniformDuDesc, duDesc.offset, duDesc.length, duDesc.stride); } if (_stencilKernel.uniformDvDesc > 0) { glUniform3i(_stencilKernel.uniformDvDesc, dvDesc.offset, dvDesc.length, dvDesc.stride); } if (_stencilKernel.uniformDuuDesc > 0) { glUniform3i(_stencilKernel.uniformDuuDesc, duuDesc.offset, duuDesc.length, duuDesc.stride); } if (_stencilKernel.uniformDuvDesc > 0) { glUniform3i(_stencilKernel.uniformDuvDesc, duvDesc.offset, duvDesc.length, duvDesc.stride); } if (_stencilKernel.uniformDvvDesc > 0) { glUniform3i(_stencilKernel.uniformDvvDesc, dvvDesc.offset, dvvDesc.length, dvvDesc.stride); } glDispatchCompute((count + _workGroupSize - 1) / _workGroupSize, 1, 1); glUseProgram(activeProgram); glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT); for (int i = 0; i < 16; ++i) { glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i, 0); } return true; } bool GLComputeEvaluator::EvalPatches( GLuint srcBuffer, BufferDescriptor const &srcDesc, GLuint dstBuffer, BufferDescriptor const &dstDesc, GLuint duBuffer, BufferDescriptor const &duDesc, GLuint dvBuffer, BufferDescriptor const &dvDesc, int numPatchCoords, GLuint patchCoordsBuffer, const PatchArrayVector &patchArrays, GLuint patchIndexBuffer, GLuint patchParamsBuffer) const { return EvalPatches(srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, 0, BufferDescriptor(), 0, BufferDescriptor(), 0, BufferDescriptor(), numPatchCoords, patchCoordsBuffer, patchArrays, patchIndexBuffer, patchParamsBuffer); } bool GLComputeEvaluator::EvalPatches( GLuint srcBuffer, BufferDescriptor const &srcDesc, GLuint dstBuffer, BufferDescriptor const &dstDesc, GLuint duBuffer, BufferDescriptor const &duDesc, GLuint dvBuffer, BufferDescriptor const &dvDesc, GLuint duuBuffer, BufferDescriptor const &duuDesc, GLuint duvBuffer, BufferDescriptor const &duvDesc, GLuint dvvBuffer, BufferDescriptor const &dvvDesc, int numPatchCoords, GLuint patchCoordsBuffer, const PatchArrayVector &patchArrays, GLuint patchIndexBuffer, GLuint patchParamsBuffer) const { if (!_patchKernel.program) return false; glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, srcBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, dstBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, duBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, dvBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, duuBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, duvBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, dvvBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, patchCoordsBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, patchIndexBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 7, patchParamsBuffer); GLint activeProgram; glGetIntegerv(GL_CURRENT_PROGRAM, &activeProgram); glUseProgram(_patchKernel.program); glUniform1i(_patchKernel.uniformSrcOffset, srcDesc.offset); glUniform1i(_patchKernel.uniformDstOffset, dstDesc.offset); int patchArraySize = sizeof(PatchArray); glBindBuffer(GL_SHADER_STORAGE_BUFFER, _patchArraysSSBO); glBufferData(GL_SHADER_STORAGE_BUFFER, patchArrays.size()*patchArraySize, NULL, GL_STATIC_DRAW); for (int i=0; i<(int)patchArrays.size(); ++i) { glBufferSubData(GL_SHADER_STORAGE_BUFFER, i*patchArraySize, sizeof(PatchArray), &patchArrays[i]); } glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, _patchArraysSSBO); if (_patchKernel.uniformDuDesc > 0) { glUniform3i(_patchKernel.uniformDuDesc, duDesc.offset, duDesc.length, duDesc.stride); } if (_patchKernel.uniformDvDesc > 0) { glUniform3i(_patchKernel.uniformDvDesc, dvDesc.offset, dvDesc.length, dvDesc.stride); } if (_patchKernel.uniformDuuDesc > 0) { glUniform3i(_patchKernel.uniformDuuDesc, duuDesc.offset, duuDesc.length, duuDesc.stride); } if (_patchKernel.uniformDuvDesc > 0) { glUniform3i(_patchKernel.uniformDuvDesc, duvDesc.offset, duvDesc.length, duvDesc.stride); } if (_patchKernel.uniformDvvDesc > 0) { glUniform3i(_patchKernel.uniformDvvDesc, dvvDesc.offset, dvvDesc.length, dvvDesc.stride); } glDispatchCompute((numPatchCoords + _workGroupSize - 1) / _workGroupSize, 1, 1); glUseProgram(activeProgram); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, 0); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, 0); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, 0); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, 0); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, 0); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, 0); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, 0); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, 0); return true; } // --------------------------------------------------------------------------- GLComputeEvaluator::_StencilKernel::_StencilKernel() : program(0) { } GLComputeEvaluator::_StencilKernel::~_StencilKernel() { if (program) { glDeleteProgram(program); } } bool GLComputeEvaluator::_StencilKernel::Compile(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc, BufferDescriptor const &dvDesc, BufferDescriptor const &duuDesc, BufferDescriptor const &duvDesc, BufferDescriptor const &dvvDesc, int workGroupSize) { // create stencil kernel if (program) { glDeleteProgram(program); } const char * kernelDefine = "#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS\n"; program = compileKernel(srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, kernelDefine, workGroupSize); if (program == 0) return false; // cache uniform locations (TODO: use uniform block) uniformStart = glGetUniformLocation(program, "batchStart"); uniformEnd = glGetUniformLocation(program, "batchEnd"); uniformSrcOffset = glGetUniformLocation(program, "srcOffset"); uniformDstOffset = glGetUniformLocation(program, "dstOffset"); uniformDuDesc = glGetUniformLocation(program, "duDesc"); uniformDvDesc = glGetUniformLocation(program, "dvDesc"); uniformDuuDesc = glGetUniformLocation(program, "duuDesc"); uniformDuvDesc = glGetUniformLocation(program, "duvDesc"); uniformDvvDesc = glGetUniformLocation(program, "dvvDesc"); return true; } // --------------------------------------------------------------------------- GLComputeEvaluator::_PatchKernel::_PatchKernel() : program(0) { } GLComputeEvaluator::_PatchKernel::~_PatchKernel() { if (program) { glDeleteProgram(program); } } bool GLComputeEvaluator::_PatchKernel::Compile(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc, BufferDescriptor const &dvDesc, BufferDescriptor const &duuDesc, BufferDescriptor const &duvDesc, BufferDescriptor const &dvvDesc, int workGroupSize) { // create stencil kernel if (program) { glDeleteProgram(program); } const char * kernelDefine = "#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES\n"; program = compileKernel(srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, kernelDefine, workGroupSize); if (program == 0) return false; // cache uniform locations uniformSrcOffset = glGetUniformLocation(program, "srcOffset"); uniformDstOffset = glGetUniformLocation(program, "dstOffset"); uniformPatchArray = glGetUniformLocation(program, "patchArray"); uniformDuDesc = glGetUniformLocation(program, "duDesc"); uniformDvDesc = glGetUniformLocation(program, "dvDesc"); uniformDuuDesc = glGetUniformLocation(program, "duuDesc"); uniformDuvDesc = glGetUniformLocation(program, "duvDesc"); uniformDvvDesc = glGetUniformLocation(program, "dvvDesc"); return true; } } // end namespace Osd } // end namespace OPENSUBDIV_VERSION } // end namespace OpenSubdiv