// // Copyright 2016 Pixar // // Licensed under the Apache License, Version 2.0 (the "Apache License") // with the following modification; you may not use this file except in // compliance with the Apache License and the following modification to it: // Section 6. Trademarks. is deleted and replaced with: // // 6. Trademarks. This License does not grant permission to use the trade // names, trademarks, service marks, or product names of the Licensor // and its affiliates, except as required to comply with Section 4(c) of // the License and to reproduce the content of the NOTICE file. // // You may obtain a copy of the Apache License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the Apache License with the above modification is // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the Apache License for the specific // language governing permissions and limitations under the Apache License. // #include "pxr/imaging/glf/glew.h" #include "pxr/imaging/hdSt/glUtils.h" #include "pxr/imaging/hgi/hgi.h" #include "pxr/imaging/hgi/blitCmds.h" #include "pxr/imaging/hgi/blitCmdsOps.h" #include "pxr/imaging/glf/contextCaps.h" #include "pxr/base/gf/vec2d.h" #include "pxr/base/gf/vec2f.h" #include "pxr/base/gf/vec2i.h" #include "pxr/base/gf/vec3d.h" #include "pxr/base/gf/vec3f.h" #include "pxr/base/gf/vec3i.h" #include "pxr/base/gf/vec4d.h" #include "pxr/base/gf/vec4f.h" #include "pxr/base/gf/vec4i.h" #include "pxr/base/gf/matrix4f.h" #include "pxr/base/gf/matrix4d.h" #include "pxr/imaging/hd/perfLog.h" #include "pxr/imaging/hd/tokens.h" #include "pxr/base/vt/array.h" #include "pxr/base/tf/envSetting.h" #include "pxr/base/tf/iterator.h" PXR_NAMESPACE_OPEN_SCOPE // To enable GPU compute features, OpenSubdiv must be configured to support // GLSL compute kernel. // #if OPENSUBDIV_HAS_GLSL_COMPUTE // default to GPU TF_DEFINE_ENV_SETTING(HD_ENABLE_GPU_COMPUTE, true, "Enable GPU smooth, quadrangulation and refinement"); #else // default to CPU TF_DEFINE_ENV_SETTING(HD_ENABLE_GPU_COMPUTE, false, "Enable GPU smooth, quadrangulation and refinement"); #endif static void _InitializeGPUComputeEnabled(bool *gpuComputeEnabled) { // GPU Compute if (TfGetEnvSetting(HD_ENABLE_GPU_COMPUTE)) { #if OPENSUBDIV_HAS_GLSL_COMPUTE const GlfContextCaps &caps = GlfContextCaps::GetInstance(); if (caps.glslVersion >= 430 && caps.shaderStorageBufferEnabled) { *gpuComputeEnabled = true; } else { TF_WARN("HD_ENABLE_GPU_COMPUTE can't be enabled " "(OpenGL 4.3 required).\n"); } #else TF_WARN("HD_ENABLE_GPU_COMPUTE can't be enabled " "(OpenSubdiv hasn't been configured with GLSL compute).\n"); #endif } } bool HdStGLUtils::IsGpuComputeEnabled() { static bool gpuComputeEnabled = false; static std::once_flag gpuComputeEnabledFlag; std::call_once(gpuComputeEnabledFlag, [](){ _InitializeGPUComputeEnabled(&gpuComputeEnabled); }); return gpuComputeEnabled; } template VtValue _CreateVtArray(int numElements, int arraySize, int stride, std::vector const &data) { VtArray array(numElements*arraySize); if (numElements == 0) return VtValue(array); const unsigned char *src = &data[0]; unsigned char *dst = (unsigned char *)array.data(); TF_VERIFY(data.size() == stride*(numElements-1) + arraySize*sizeof(T)); if (stride == static_cast(arraySize*sizeof(T))) { memcpy(dst, src, numElements*arraySize*sizeof(T)); } else { // deinterleaving for (int i = 0; i < numElements; ++i) { memcpy(dst, src, arraySize*sizeof(T)); dst += arraySize*sizeof(T); src += stride; } } return VtValue(array); } VtValue HdStGLUtils::ReadBuffer(GLint vbo, HdTupleType tupleType, int vboOffset, int stride, int numElems) { if (glBufferSubData == NULL) return VtValue(); // HdTupleType represents scalar, vector, matrix, and array types. const int bytesPerElement = HdDataSizeOfTupleType(tupleType); const int arraySize = tupleType.count; // Stride is the byte distance between subsequent elements. // If stride was not provided (aka 0), we assume elements are // tightly packed and have no interleaved data. if (stride == 0) stride = bytesPerElement; TF_VERIFY(stride >= bytesPerElement); // Total VBO size is the sum of the strides required to cover // every element up to the last, which only requires bytesPerElement. // // +---------+---------+---------+ // | :SRC: | :SRC: | :SRC: | // +---------+---------+---------+ // <-------read range------> // | ^ | ^ | // | stride * (n -1) | | // bytesPerElement // const GLsizeiptr vboSize = stride * (numElems-1) + bytesPerElement; GlfContextCaps const &caps = GlfContextCaps::GetInstance(); // Read data from GL std::vector tmp(vboSize); if (vbo > 0) { if (caps.directStateAccessEnabled) { glGetNamedBufferSubData(vbo, vboOffset, vboSize, &tmp[0]); } else { glBindBuffer(GL_ARRAY_BUFFER, vbo); glGetBufferSubData(GL_ARRAY_BUFFER, vboOffset, vboSize, &tmp[0]); glBindBuffer(GL_ARRAY_BUFFER, 0); } } // Convert data to Vt switch (tupleType.type) { case HdTypeInt8: return _CreateVtArray(numElems, arraySize, stride, tmp); case HdTypeInt16: return _CreateVtArray(numElems, arraySize, stride, tmp); case HdTypeUInt16: return _CreateVtArray(numElems, arraySize, stride, tmp); case HdTypeUInt32: return _CreateVtArray(numElems, arraySize, stride, tmp); case HdTypeInt32: return _CreateVtArray(numElems, arraySize, stride, tmp); case HdTypeInt32Vec2: return _CreateVtArray(numElems, arraySize, stride, tmp); case HdTypeInt32Vec3: return _CreateVtArray(numElems, arraySize, stride, tmp); case HdTypeInt32Vec4: return _CreateVtArray(numElems, arraySize, stride, tmp); case HdTypeFloat: return _CreateVtArray(numElems, arraySize, stride, tmp); case HdTypeFloatVec2: return _CreateVtArray(numElems, arraySize, stride, tmp); case HdTypeFloatVec3: return _CreateVtArray(numElems, arraySize, stride, tmp); case HdTypeFloatVec4: return _CreateVtArray(numElems, arraySize, stride, tmp); case HdTypeFloatMat4: return _CreateVtArray(numElems, arraySize, stride, tmp); case HdTypeDouble: return _CreateVtArray(numElems, arraySize, stride, tmp); case HdTypeDoubleVec2: return _CreateVtArray(numElems, arraySize, stride, tmp); case HdTypeDoubleVec3: return _CreateVtArray(numElems, arraySize, stride, tmp); case HdTypeDoubleVec4: return _CreateVtArray(numElems, arraySize, stride, tmp); case HdTypeDoubleMat4: return _CreateVtArray(numElems, arraySize, stride, tmp); default: TF_CODING_ERROR("Unhandled data type %i", tupleType.type); } return VtValue(); } bool HdStGLUtils::GetShaderCompileStatus(GLuint shader, std::string * reason) { // glew has to be initialized if (!glGetShaderiv) return true; GLint status = 0; glGetShaderiv(shader, GL_COMPILE_STATUS, &status); if (reason) { GLint infoLength = 0; glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &infoLength); if (infoLength > 0) { char *infoLog = new char[infoLength];; glGetShaderInfoLog(shader, infoLength, NULL, infoLog); reason->assign(infoLog, infoLength); delete[] infoLog; } } return (status == GL_TRUE); } bool HdStGLUtils::GetProgramLinkStatus(GLuint program, std::string * reason) { // glew has to be initialized if (!glGetProgramiv) return true; GLint status = 0; glGetProgramiv(program, GL_LINK_STATUS, &status); if (reason) { GLint infoLength = 0; glGetProgramiv(program, GL_INFO_LOG_LENGTH, &infoLength); if (infoLength > 0) { char *infoLog = new char[infoLength];; glGetProgramInfoLog(program, infoLength, NULL, infoLog); reason->assign(infoLog, infoLength); delete[] infoLog; } } return (status == GL_TRUE); } // --------------------------------------------------------------------------- void HdStGLBufferRelocator::AddRange(GLintptr readOffset, GLintptr writeOffset, GLsizeiptr copySize) { _CopyUnit unit(readOffset, writeOffset, copySize); if (_queue.empty() || (!_queue.back().Concat(unit))) { _queue.push_back(unit); } } void HdStGLBufferRelocator::Commit(Hgi* hgi) { HgiBufferGpuToGpuOp blitOp; blitOp.gpuSourceBuffer = _srcBuffer; blitOp.gpuDestinationBuffer = _dstBuffer; // Use blit work to record resource copy commands. HgiBlitCmdsUniquePtr blitCmds = hgi->CreateBlitCmds(); TF_FOR_ALL (it, _queue) { blitOp.sourceByteOffset = it->readOffset; blitOp.byteSize = it->copySize; blitOp.destinationByteOffset = it->writeOffset; blitCmds->CopyBufferGpuToGpu(blitOp); } hgi->SubmitCmds(blitCmds.get()); HD_PERF_COUNTER_ADD(HdPerfTokens->glCopyBufferSubData, (double)_queue.size()); _queue.clear(); } PXR_NAMESPACE_CLOSE_SCOPE