// // Copyright 2016 Pixar // // Licensed under the Apache License, Version 2.0 (the "Apache License") // with the following modification; you may not use this file except in // compliance with the Apache License and the following modification to it: // Section 6. Trademarks. is deleted and replaced with: // // 6. Trademarks. This License does not grant permission to use the trade // names, trademarks, service marks, or product names of the Licensor // and its affiliates, except as required to comply with Section 4(c) of // the License and to reproduce the content of the NOTICE file. // // You may obtain a copy of the Apache License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the Apache License with the above modification is // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the Apache License for the specific // language governing permissions and limitations under the Apache License. // #include "pxr/imaging/glf/glew.h" #include "pxr/imaging/glf/textureRegistry.h" #include "pxr/base/work/loops.h" #include "pxr/imaging/hd/tokens.h" #include "pxr/imaging/hdSt/copyComputation.h" #include "pxr/imaging/hdSt/dispatchBuffer.h" #include "pxr/imaging/hdSt/glslProgram.h" #include "pxr/imaging/hdSt/interleavedMemoryManager.h" #include "pxr/imaging/hdSt/persistentBuffer.h" #include "pxr/imaging/hdSt/resourceRegistry.h" #include "pxr/imaging/hdSt/textureResource.h" #include "pxr/imaging/hdSt/vboMemoryManager.h" #include "pxr/imaging/hdSt/vboSimpleMemoryManager.h" #include "pxr/imaging/hdSt/shaderCode.h" #include "pxr/imaging/hdSt/textureHandleRegistry.h" #include "pxr/imaging/hdSt/textureObjectRegistry.h" #include "pxr/base/tf/envSetting.h" PXR_NAMESPACE_OPEN_SCOPE TF_DEFINE_ENV_SETTING(HDST_ENABLE_RESOURCE_INSTANCING, true, "Enable instance registry deduplication of resource data"); static void _CopyChainedBuffers(HdBufferSourceSharedPtr const& src, HdBufferArrayRangeSharedPtr const& range) { if (src->HasChainedBuffer()) { HdBufferSourceSharedPtrVector chainedSrcs = src->GetChainedBuffers(); // traverse the tree in a DFS fashion for(auto& c : chainedSrcs) { range->CopyData(c); _CopyChainedBuffers(c, range); } } } static bool _IsEnabledResourceInstancing() { static bool isResourceInstancingEnabled = TfGetEnvSetting(HDST_ENABLE_RESOURCE_INSTANCING); return isResourceInstancingEnabled; } template HdInstance _Register(ID id, HdInstanceRegistry ®istry, TfToken const &perfToken) { if (_IsEnabledResourceInstancing()) { HdInstance instance = registry.GetInstance(id); if (instance.IsFirstInstance()) { HD_PERF_COUNTER_INCR(perfToken); } return instance; } else { // Return an instance that is not managed by the registry when // topology instancing is disabled. return HdInstance(id); } } HdStResourceRegistry::HdStResourceRegistry(Hgi * const hgi) : _hgi(hgi) , _numBufferSourcesToResolve(0) // default aggregation strategies for varying (vertex, varying) primvars , _nonUniformAggregationStrategy( std::make_unique(_hgi)) , _nonUniformImmutableAggregationStrategy( std::make_unique(_hgi)) // default aggregation strategy for uniform on UBO (for globals) , _uniformUboAggregationStrategy( std::make_unique(_hgi)) // default aggregation strategy for uniform on SSBO (for primvars) , _uniformSsboAggregationStrategy( std::make_unique(_hgi)) // default aggregation strategy for single buffers (for nested instancer) , _singleAggregationStrategy( std::make_unique(_hgi)) , _textureHandleRegistry(std::make_unique(hgi)) { } HdStResourceRegistry::~HdStResourceRegistry() = default; void HdStResourceRegistry::InvalidateShaderRegistry() { _geometricShaderRegistry.Invalidate(); } VtDictionary HdStResourceRegistry::GetResourceAllocation() const { VtDictionary result; size_t gpuMemoryUsed = 0; // buffer array allocation const size_t nonUniformSize = _nonUniformBufferArrayRegistry.GetResourceAllocation( _nonUniformAggregationStrategy.get(), result) + _nonUniformImmutableBufferArrayRegistry.GetResourceAllocation( _nonUniformImmutableAggregationStrategy.get(), result); const size_t uboSize = _uniformUboBufferArrayRegistry.GetResourceAllocation( _uniformUboAggregationStrategy.get(), result); const size_t ssboSize = _uniformSsboBufferArrayRegistry.GetResourceAllocation( _uniformSsboAggregationStrategy.get(), result); const size_t singleBufferSize = _singleBufferArrayRegistry.GetResourceAllocation( _singleAggregationStrategy.get(), result); result[HdPerfTokens->nonUniformSize] = VtValue(nonUniformSize); result[HdPerfTokens->uboSize] = VtValue(uboSize); result[HdPerfTokens->ssboSize] = VtValue(ssboSize); result[HdPerfTokens->singleBufferSize] = VtValue(singleBufferSize); gpuMemoryUsed += nonUniformSize + uboSize + ssboSize + singleBufferSize; result[HdPerfTokens->gpuMemoryUsed.GetString()] = gpuMemoryUsed; // Prompt derived registries to tally their resources. _TallyResourceAllocation(&result); gpuMemoryUsed = VtDictionaryGet(result, HdPerfTokens->gpuMemoryUsed.GetString(), VtDefault = 0); HD_PERF_COUNTER_SET(HdPerfTokens->gpuMemoryUsed, gpuMemoryUsed); return result; } Hgi* HdStResourceRegistry::GetHgi() { return _hgi; } /// ------------------------------------------------------------------------ /// BAR allocation API /// ------------------------------------------------------------------------ HdBufferArrayRangeSharedPtr HdStResourceRegistry::AllocateNonUniformBufferArrayRange( TfToken const &role, HdBufferSpecVector const &bufferSpecs, HdBufferArrayUsageHint usageHint) { return _AllocateBufferArrayRange( _nonUniformAggregationStrategy.get(), _nonUniformBufferArrayRegistry, role, bufferSpecs, usageHint); } HdBufferArrayRangeSharedPtr HdStResourceRegistry::AllocateNonUniformImmutableBufferArrayRange( TfToken const &role, HdBufferSpecVector const &bufferSpecs, HdBufferArrayUsageHint usageHint) { usageHint.bits.immutable = 1; return _AllocateBufferArrayRange( _nonUniformImmutableAggregationStrategy.get(), _nonUniformImmutableBufferArrayRegistry, role, bufferSpecs, usageHint); } HdBufferArrayRangeSharedPtr HdStResourceRegistry::AllocateUniformBufferArrayRange( TfToken const &role, HdBufferSpecVector const &bufferSpecs, HdBufferArrayUsageHint usageHint) { return _AllocateBufferArrayRange( _uniformUboAggregationStrategy.get(), _uniformUboBufferArrayRegistry, role, bufferSpecs, usageHint); } HdBufferArrayRangeSharedPtr HdStResourceRegistry::AllocateShaderStorageBufferArrayRange( TfToken const &role, HdBufferSpecVector const &bufferSpecs, HdBufferArrayUsageHint usageHint) { return _AllocateBufferArrayRange( _uniformSsboAggregationStrategy.get(), _uniformSsboBufferArrayRegistry, role, bufferSpecs, usageHint); } HdBufferArrayRangeSharedPtr HdStResourceRegistry::AllocateSingleBufferArrayRange( TfToken const &role, HdBufferSpecVector const &bufferSpecs, HdBufferArrayUsageHint usageHint) { return _AllocateBufferArrayRange( _singleAggregationStrategy.get(), _singleBufferArrayRegistry, role, bufferSpecs, usageHint); } /// ------------------------------------------------------------------------ /// BAR allocation/migration/update API /// ------------------------------------------------------------------------ HdBufferArrayRangeSharedPtr HdStResourceRegistry::UpdateNonUniformBufferArrayRange( TfToken const &role, HdBufferArrayRangeSharedPtr const& curRange, HdBufferSpecVector const &updatedOrAddedSpecs, HdBufferSpecVector const& removedSpecs, HdBufferArrayUsageHint usageHint) { return _UpdateBufferArrayRange( _nonUniformAggregationStrategy.get(), _nonUniformBufferArrayRegistry, role, curRange, updatedOrAddedSpecs, removedSpecs, usageHint); } HdBufferArrayRangeSharedPtr HdStResourceRegistry::UpdateNonUniformImmutableBufferArrayRange( TfToken const &role, HdBufferArrayRangeSharedPtr const& curRange, HdBufferSpecVector const &updatedOrAddedSpecs, HdBufferSpecVector const& removedSpecs, HdBufferArrayUsageHint usageHint) { usageHint.bits.immutable = 1; return _UpdateBufferArrayRange( _nonUniformImmutableAggregationStrategy.get(), _nonUniformImmutableBufferArrayRegistry, role, curRange, updatedOrAddedSpecs, removedSpecs, usageHint); } HdBufferArrayRangeSharedPtr HdStResourceRegistry::UpdateUniformBufferArrayRange( TfToken const &role, HdBufferArrayRangeSharedPtr const& curRange, HdBufferSpecVector const &updatedOrAddedSpecs, HdBufferSpecVector const& removedSpecs, HdBufferArrayUsageHint usageHint) { return _UpdateBufferArrayRange( _uniformUboAggregationStrategy.get(), _uniformUboBufferArrayRegistry, role, curRange, updatedOrAddedSpecs, removedSpecs, usageHint); } HdBufferArrayRangeSharedPtr HdStResourceRegistry::UpdateShaderStorageBufferArrayRange( TfToken const &role, HdBufferArrayRangeSharedPtr const& curRange, HdBufferSpecVector const &updatedOrAddedSpecs, HdBufferSpecVector const& removedSpecs, HdBufferArrayUsageHint usageHint) { return _UpdateBufferArrayRange( _uniformSsboAggregationStrategy.get(), _uniformSsboBufferArrayRegistry, role, curRange, updatedOrAddedSpecs, removedSpecs, usageHint); } /// ------------------------------------------------------------------------ /// Resource update & computation queuing API /// ------------------------------------------------------------------------ void HdStResourceRegistry::AddSources(HdBufferArrayRangeSharedPtr const &range, HdBufferSourceSharedPtrVector &&sources) { HD_TRACE_FUNCTION(); HF_MALLOC_TAG_FUNCTION(); if (ARCH_UNLIKELY(sources.empty())) { TF_RUNTIME_ERROR("sources list is empty"); return; } // range has to be valid if (ARCH_UNLIKELY(!(range && range->IsValid()))) { TF_RUNTIME_ERROR("range is null or invalid"); return; } // Check that each buffer is valid and if not erase it from the list // Can not use standard iterators here as erasing invalidates them // also the vector is unordered, so we can do a quick erase // by moving the item off the end of the vector. size_t srcNum = 0; while (srcNum < sources.size()) { if (ARCH_LIKELY(sources[srcNum]->IsValid())) { if (ARCH_UNLIKELY(sources[srcNum]->HasPreChainedBuffer())) { AddSource(sources[srcNum]->GetPreChainedBuffer()); } ++srcNum; } else { TF_RUNTIME_ERROR("Source Buffer for %s is invalid", sources[srcNum]->GetName().GetText()); // Move the last item in the vector over // this one. If it is the last item // it will copy over itself and the pop // will remove it anyway. sources[srcNum] = sources.back(); sources.pop_back(); // Don't increament srcNum as it now points // to the new item or is off the end of the vector } } // Check for no-valid buffer case if (!sources.empty()) { _numBufferSourcesToResolve += sources.size(); const _PendingSourceList::iterator it = _pendingSources.emplace_back( range, std::move(sources)); TF_VERIFY(range.use_count() >=2); } } void HdStResourceRegistry::AddSource(HdBufferArrayRangeSharedPtr const &range, HdBufferSourceSharedPtr const &source) { HD_TRACE_FUNCTION(); HF_MALLOC_TAG_FUNCTION(); if (ARCH_UNLIKELY((!source) || (!range))) { TF_RUNTIME_ERROR("An input pointer is null"); return; } // range has to be valid if (ARCH_UNLIKELY(!range->IsValid())) { TF_RUNTIME_ERROR("range is invalid"); return; } // Buffer has to be valid if (ARCH_UNLIKELY(!source->IsValid())) { TF_RUNTIME_ERROR("source buffer for %s is invalid", source->GetName().GetText()); return; } if (ARCH_UNLIKELY(source->HasPreChainedBuffer())) { AddSource(source->GetPreChainedBuffer()); } _pendingSources.emplace_back(range, source); ++_numBufferSourcesToResolve; // Atomic } void HdStResourceRegistry::AddSource(HdBufferSourceSharedPtr const &source) { HD_TRACE_FUNCTION(); HF_MALLOC_TAG_FUNCTION(); if (ARCH_UNLIKELY(!source)) { TF_RUNTIME_ERROR("source pointer is null"); return; } // Buffer has to be valid if (ARCH_UNLIKELY(!source->IsValid())) { TF_RUNTIME_ERROR("source buffer for %s is invalid", source->GetName().GetText()); return; } if (ARCH_UNLIKELY(source->HasPreChainedBuffer())) { AddSource(source->GetPreChainedBuffer()); } _pendingSources.emplace_back(HdBufferArrayRangeSharedPtr(), source); ++_numBufferSourcesToResolve; // Atomic } void HdStResourceRegistry::AddComputation(HdBufferArrayRangeSharedPtr const &range, HdComputationSharedPtr const &computation) { HD_TRACE_FUNCTION(); HF_MALLOC_TAG_FUNCTION(); // if the computation is buffer source computation, it will be appended // into pendingBufferSourceComputations, which is executed right after // the first buffer source transfers. Those computations produce // buffer sources as results of computation, so the registry also invokes // another transfers for such buffers. The computation isn't marked // as a buffer source computation will be executed at the end. _pendingComputations.emplace_back(range, computation); } /// ------------------------------------------------------------------------ /// Dispatch & persistent buffer API /// ------------------------------------------------------------------------ HdStDispatchBufferSharedPtr HdStResourceRegistry::RegisterDispatchBuffer( TfToken const &role, int count, int commandNumUints) { HdStDispatchBufferSharedPtr const result = std::make_shared( _hgi, role, count, commandNumUints); _dispatchBufferRegistry.push_back(result); return result; } HdStPersistentBufferSharedPtr HdStResourceRegistry::RegisterPersistentBuffer( TfToken const &role, size_t dataSize, void *data) { HdStPersistentBufferSharedPtr const result = std::make_shared( _hgi, role, dataSize, data); _persistentBufferRegistry.push_back(result); return result; } void HdStResourceRegistry::GarbageCollectDispatchBuffers() { HD_TRACE_FUNCTION(); _dispatchBufferRegistry.erase( std::remove_if( _dispatchBufferRegistry.begin(), _dispatchBufferRegistry.end(), std::bind(&HdStDispatchBufferSharedPtr::unique, std::placeholders::_1)), _dispatchBufferRegistry.end()); } void HdStResourceRegistry::GarbageCollectPersistentBuffers() { HD_TRACE_FUNCTION(); _persistentBufferRegistry.erase( std::remove_if( _persistentBufferRegistry.begin(), _persistentBufferRegistry.end(), std::bind(&HdStPersistentBufferSharedPtr::unique, std::placeholders::_1)), _persistentBufferRegistry.end()); } /// ------------------------------------------------------------------------ /// Instance Registries /// ------------------------------------------------------------------------ HdInstance HdStResourceRegistry::RegisterMeshTopology( HdInstance::ID id) { return _Register(id, _meshTopologyRegistry, HdPerfTokens->instMeshTopology); } HdInstance HdStResourceRegistry::RegisterBasisCurvesTopology( HdInstance::ID id) { return _Register(id, _basisCurvesTopologyRegistry, HdPerfTokens->instBasisCurvesTopology); } HdInstance HdStResourceRegistry::RegisterVertexAdjacency( HdInstance::ID id) { return _Register(id, _vertexAdjacencyRegistry, HdPerfTokens->instVertexAdjacency); } HdInstance HdStResourceRegistry::RegisterMeshIndexRange( HdInstance::ID id, TfToken const &name) { return _Register(id, _meshTopologyIndexRangeRegistry[name], HdPerfTokens->instMeshTopologyRange); } HdInstance HdStResourceRegistry::RegisterBasisCurvesIndexRange( HdInstance::ID id, TfToken const &name) { return _Register(id, _basisCurvesTopologyIndexRangeRegistry[name], HdPerfTokens->instBasisCurvesTopologyRange); } HdInstance HdStResourceRegistry::RegisterPrimvarRange( HdInstance::ID id) { return _Register(id, _primvarRangeRegistry, HdPerfTokens->instPrimvarRange); } HdInstance HdStResourceRegistry::RegisterExtComputationDataRange( HdInstance::ID id) { return _Register(id, _extComputationDataRangeRegistry, HdPerfTokens->instExtComputationDataRange); } HdInstance HdStResourceRegistry::RegisterTextureResource(TextureKey id) { return _textureResourceRegistry.GetInstance(id); } HdInstance HdStResourceRegistry::FindTextureResource(TextureKey id, bool *found) { return _textureResourceRegistry.FindInstance(id, found); } HdInstance HdStResourceRegistry::RegisterGeometricShader( HdInstance::ID id) { return _geometricShaderRegistry.GetInstance(id); } HdInstance HdStResourceRegistry::RegisterGLSLProgram( HdInstance::ID id) { return _glslProgramRegistry.GetInstance(id); } HdInstance HdStResourceRegistry::RegisterTextureResourceHandle( HdInstance::ID id) { return _textureResourceHandleRegistry.GetInstance(id); } HdInstance HdStResourceRegistry::FindTextureResourceHandle( HdInstance::ID id, bool *found) { return _textureResourceHandleRegistry.FindInstance(id, found); } std::ostream &operator <<( std::ostream &out, const HdStResourceRegistry& self) { out << "HdStResourceRegistry " << &self << " :\n"; out << self._nonUniformBufferArrayRegistry; out << self._nonUniformImmutableBufferArrayRegistry; out << self._uniformUboBufferArrayRegistry; out << self._uniformSsboBufferArrayRegistry; out << self._singleBufferArrayRegistry; return out; } void HdStResourceRegistry::_CommitTextures() { HdStShaderCode::ResourceContext ctx(this); const std::set shaderCodes = _textureHandleRegistry->Commit(); // Give assoicated HdStShaderCode objects a chance to add buffer // sources that rely on texture sampler handles (bindless) or // texture metadata (e.g., sampling transform for volume fields). for (HdStShaderCodeSharedPtr const & shaderCode : shaderCodes) { shaderCode->AddResourcesFromTextures(ctx); } } void HdStResourceRegistry::_Commit() { // Process textures first before resolving buffer sources since // some computation buffer sources need meta-data from textures // (such as the grid transform for an OpenVDB file) or texture // handles (for bindless textures). _CommitTextures(); // TODO: requests should be sorted by resource, and range. { HD_TRACE_SCOPE("Resolve"); // 1. resolve & resize phase: // for each pending source, resolve and check if it needs buffer // reallocation or not. size_t numBufferSourcesResolved = 0; int numThreads = 1; //omp_get_max_threads(); int numIterations = 0; // iterate until all buffer sources have been resolved. while (numBufferSourcesResolved < _numBufferSourcesToResolve) { // XXX: Parallel for is currently much slower than a single // thread in all tested scenarios, disabling until we can // figure out what's going on here. //#pragma omp parallel for for (int i = 0; i < numThreads; ++i) { // iterate over all pending sources for (_PendingSource const& req: _pendingSources) { for (HdBufferSourceSharedPtr const& source: req.sources) { // execute computation. // call IsResolved first since Resolve is virtual and // could be costly. if (!source->IsResolved()) { if (source->Resolve()) { TF_VERIFY(source->IsResolved(), "Name = %s", source->GetName().GetText()); ++numBufferSourcesResolved; // call resize if it's the first in sources. if (req.range && source == *req.sources.begin()) { req.range->Resize( source->GetNumElements()); } } } } } } if (++numIterations > 100) { TF_WARN("Too many iterations in resolving buffer source. " "It's likely due to incosistent dependency."); break; } } TF_VERIFY(numBufferSourcesResolved == _numBufferSourcesToResolve); HD_PERF_COUNTER_ADD(HdPerfTokens->bufferSourcesResolved, numBufferSourcesResolved); } { HD_TRACE_SCOPE("GPU computation prep"); // 2. GPU computation prep phase: // for each gpu computation, make sure its destination buffer to be // allocated. // TF_FOR_ALL(compIt, _pendingComputations) { if (compIt->range) { // ask the size of destination buffer of the gpu computation int numElements = compIt->computation->GetNumOutputElements(); if (numElements > 0) { // We call BufferArray->Reallocate() later so that // the reallocation happens only once per BufferArray. // // if the range is already larger than the current one, // leave it as it is (there is a possibilty that GPU // computation generates less data than it was). int currentNumElements = compIt->range->GetNumElements(); if (currentNumElements < numElements) { compIt->range->Resize(numElements); } } } } } { HD_TRACE_SCOPE("Reallocate buffer arrays"); // 3. reallocation phase: // _nonUniformBufferArrayRegistry.ReallocateAll( _nonUniformAggregationStrategy.get()); _nonUniformImmutableBufferArrayRegistry.ReallocateAll( _nonUniformImmutableAggregationStrategy.get()); _uniformUboBufferArrayRegistry.ReallocateAll( _uniformUboAggregationStrategy.get()); _uniformSsboBufferArrayRegistry.ReallocateAll( _uniformSsboAggregationStrategy.get()); _singleBufferArrayRegistry.ReallocateAll( _singleAggregationStrategy.get()); } { HD_TRACE_SCOPE("Copy"); // 4. copy phase: // TF_FOR_ALL(reqIt, _pendingSources) { // CPU computation may not have a range. (e.g. adjacency) if (!reqIt->range) continue; // CPU computation may result in an empty buffer source // (e.g. GPU quadrangulation table could be empty for quad only // mesh) if (reqIt->range->GetNumElements() == 0) continue; // Note that for staticArray in interleavedVBO, // it's possible range->GetNumElements() != srcIt->GetNumElements(). // (range->GetNumElements() should always be 1, but srcIt // (vtBufferSource) could have a VtArray with arraySize entries). TF_FOR_ALL(srcIt, reqIt->sources) { // execute copy reqIt->range->CopyData(*srcIt); // also copy any chained buffers _CopyChainedBuffers(*srcIt, reqIt->range); } if (TfDebug::IsEnabled(HD_BUFFER_ARRAY_RANGE_CLEANED)) { std::stringstream ss; ss << *reqIt->range; TF_DEBUG(HD_BUFFER_ARRAY_RANGE_CLEANED).Msg("CLEAN: %s\n", ss.str().c_str()); } } } { // HD_TRACE_SCOPE("Flush"); // 5. flush phase: // // flush cosolidated buffer updates } { HD_TRACE_SCOPE("GpuComputation Execute"); // 6. execute GPU computations // // note: GPU computations have to be executed in the order that // they are registered. // e.g. smooth normals -> quadrangulation. // TF_FOR_ALL(it, _pendingComputations) { it->computation->Execute(it->range, this); HD_PERF_COUNTER_INCR(HdPerfTokens->computationsCommited); } } // release sources WorkParallelForEach(_pendingSources.begin(), _pendingSources.end(), [](_PendingSource &ps) { ps.range.reset(); ps.sources.clear(); }); _pendingSources.clear(); _numBufferSourcesToResolve = 0; _pendingComputations.clear(); } void HdStResourceRegistry::_GarbageCollect() { // The sequence in which we run garbage collection is significant. // We want to clean objects first which might be holding references // to other objects which will be subsequently cleaned up. GarbageCollectDispatchBuffers(); GarbageCollectPersistentBuffers(); { size_t count = _meshTopologyRegistry.GarbageCollect(); HD_PERF_COUNTER_SET(HdPerfTokens->instMeshTopology, count); } { size_t count = _basisCurvesTopologyRegistry.GarbageCollect(); HD_PERF_COUNTER_SET(HdPerfTokens->instBasisCurvesTopology, count); } { size_t count = _vertexAdjacencyRegistry.GarbageCollect(); HD_PERF_COUNTER_SET(HdPerfTokens->instVertexAdjacency, count); } { size_t count = 0; for (auto & it: _meshTopologyIndexRangeRegistry) { count += it.second.GarbageCollect(); } HD_PERF_COUNTER_SET(HdPerfTokens->instMeshTopologyRange, count); } { size_t count = 0; for (auto & it: _basisCurvesTopologyIndexRangeRegistry) { count += it.second.GarbageCollect(); } HD_PERF_COUNTER_SET(HdPerfTokens->instBasisCurvesTopologyRange, count); } { size_t count = _primvarRangeRegistry.GarbageCollect(); HD_PERF_COUNTER_SET(HdPerfTokens->instPrimvarRange, count); } { size_t count = _extComputationDataRangeRegistry.GarbageCollect(); HD_PERF_COUNTER_SET(HdPerfTokens->instExtComputationDataRange, count); } // Cleanup Shader registries _geometricShaderRegistry.GarbageCollect(); _glslProgramRegistry.GarbageCollect(); _textureResourceHandleRegistry.GarbageCollect(); // cleanup buffer array // buffer array retains weak_ptrs of range. All unused ranges should be // deleted (expired) at this point. _nonUniformBufferArrayRegistry.GarbageCollect(); _nonUniformImmutableBufferArrayRegistry.GarbageCollect(); _uniformUboBufferArrayRegistry.GarbageCollect(); _uniformSsboBufferArrayRegistry.GarbageCollect(); _singleBufferArrayRegistry.GarbageCollect(); } void HdStResourceRegistry::_GarbageCollectBprims() { // Cleanup texture registries _textureResourceRegistry.GarbageCollect(); } HdBufferArrayRangeSharedPtr HdStResourceRegistry::_AllocateBufferArrayRange( HdAggregationStrategy *strategy, HdBufferArrayRegistry &bufferArrayRegistry, TfToken const &role, HdBufferSpecVector const &bufferSpecs, HdBufferArrayUsageHint usageHint) { return bufferArrayRegistry.AllocateRange( strategy, role, bufferSpecs, usageHint); } HdBufferArrayRangeSharedPtr HdStResourceRegistry::_UpdateBufferArrayRange( HdAggregationStrategy *strategy, HdBufferArrayRegistry &bufferArrayRegistry, TfToken const &role, HdBufferArrayRangeSharedPtr const& curRange, HdBufferSpecVector const &updatedOrAddedSpecs, HdBufferSpecVector const& removedSpecs, HdBufferArrayUsageHint usageHint) { HD_TRACE_FUNCTION(); if (!curRange || !curRange->IsValid()) { if (!removedSpecs.empty()) { TF_CODING_ERROR("Non-empty removed specs during BAR allocation\n"); } // Allocate a new BAR and return it. return _AllocateBufferArrayRange(strategy, bufferArrayRegistry, role, updatedOrAddedSpecs, usageHint); } HdBufferSpecVector curBufferSpecs; curRange->GetBufferSpecs(&curBufferSpecs); // Determine if the BAR needs reallocation + migration { bool haveBuffersToUpdate = !updatedOrAddedSpecs.empty(); bool dataUpdateForImmutableBar = curRange->IsImmutable() && haveBuffersToUpdate; bool usageHintChanged = curRange->GetUsageHint().value != usageHint.value; bool needsMigration = dataUpdateForImmutableBar || usageHintChanged || // buffer removal or addition !removedSpecs.empty() || !HdBufferSpec::IsSubset(updatedOrAddedSpecs, curBufferSpecs); if (!needsMigration) { // The existing BAR can be used to queue any updates. return curRange; } } // Create new BAR ... HdBufferSpecVector newBufferSpecs = HdBufferSpec::ComputeUnion(updatedOrAddedSpecs, HdBufferSpec::ComputeDifference(curBufferSpecs, removedSpecs)); HdBufferArrayRangeSharedPtr newRange = _AllocateBufferArrayRange( strategy, bufferArrayRegistry, role, newBufferSpecs, usageHint); // ... and migrate relevant buffers that haven't changed. // (skip the dirty sources, since new data needs to be copied over) HdBufferSpecVector migrateSpecs = HdBufferSpec::ComputeDifference( newBufferSpecs, updatedOrAddedSpecs); for (const auto& spec : migrateSpecs) { AddComputation(/*dstRange*/newRange, std::make_shared( /*src=*/curRange, spec.name)); } // Increment version of the underlying bufferArray to notify // all batches pointing to the range to be rebuilt. curRange->IncrementVersion(); // XXX: The existing range may no longer used. Currently, the caller is // expected to flag garbage collection to reclaim its resources. HD_PERF_COUNTER_INCR(HdPerfTokens->bufferArrayRangeMigrated); return newRange; } void HdStResourceRegistry::_TallyResourceAllocation(VtDictionary *result) const { size_t gpuMemoryUsed = VtDictionaryGet(*result, HdPerfTokens->gpuMemoryUsed.GetString(), VtDefault = 0); // dispatch buffers for (auto const & buffer: _dispatchBufferRegistry) { if (!TF_VERIFY(buffer)) { continue; } std::string const & role = buffer->GetRole().GetString(); size_t size = size_t(buffer->GetEntireResource()->GetSize()); (*result)[role] = VtDictionaryGet(*result, role, VtDefault = 0) + size; gpuMemoryUsed += size; } // persistent buffers for (auto const & buffer: _persistentBufferRegistry) { if (!TF_VERIFY(buffer)) { continue; } std::string const & role = buffer->GetRole().GetString(); size_t size = size_t(buffer->GetSize()); (*result)[role] = VtDictionaryGet(*result, role, VtDefault = 0) + size; gpuMemoryUsed += size; } // glsl program & ubo allocation for (auto const & it: _glslProgramRegistry) { HdStGLSLProgramSharedPtr const & program = it.second.value; // In the event of a compile or link error, programs can be null if (!program) { continue; } HgiShaderProgramHandle const& prgHandle = program->GetProgram(); size_t size = prgHandle ? prgHandle->GetByteSizeOfResource() : 0; // the role of program and global uniform buffer is always same. std::string const &role = program->GetRole().GetString(); (*result)[role] = VtDictionaryGet(*result, role, VtDefault = 0) + size; gpuMemoryUsed += size; } // Texture Resources { size_t textureResourceMemory = 0; for (auto const & it: _textureResourceRegistry) { HdStTextureResourceSharedPtr const & texResource = it.second.value; // In the event of an asset error, texture resources can be null if (!texResource) { continue; } textureResourceMemory += texResource->GetMemoryUsed(); } (*result)[HdPerfTokens->textureResourceMemory] = VtValue( textureResourceMemory); gpuMemoryUsed += textureResourceMemory; } // Texture registry { GlfTextureRegistry &textureReg = GlfTextureRegistry::GetInstance(); std::vector textureInfo = textureReg.GetTextureInfos(); size_t textureMemory = 0; TF_FOR_ALL (textureIt, textureInfo) { VtDictionary &info = (*textureIt); textureMemory += info["memoryUsed"].Get(); } (*result)[HdPerfTokens->textureMemory] = VtValue(textureMemory); } (*result)[HdPerfTokens->gpuMemoryUsed.GetString()] = gpuMemoryUsed; } HdStTextureHandleSharedPtr HdStResourceRegistry::AllocateTextureHandle( HdStTextureIdentifier const &textureId, const HdTextureType textureType, HdSamplerParameters const &samplerParams, const size_t memoryRequest, const bool createBindlessHandle, HdStShaderCodePtr const &shaderCode) { return _textureHandleRegistry->AllocateTextureHandle( textureId, textureType, samplerParams, memoryRequest, createBindlessHandle, shaderCode); } HdStTextureObjectSharedPtr HdStResourceRegistry::AllocateTextureObject( HdStTextureIdentifier const &textureId, const HdTextureType textureType) { HdSt_TextureObjectRegistry * const reg = _textureHandleRegistry->GetTextureObjectRegistry(); return reg->AllocateTextureObject( textureId, textureType); } PXR_NAMESPACE_CLOSE_SCOPE