// Copyright 2015 The Gemmlowp Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // instrumentation.h: contains the definitions needed to // instrument code for profiling: // ScopedProfilingLabel, RegisterCurrentThreadForProfiling. // // profiler.h is only needed to drive the profiler: // StartProfiling, FinishProfiling. // // See the usage example in profiler.h. #ifndef GEMMLOWP_PROFILING_INSTRUMENTATION_H_ #define GEMMLOWP_PROFILING_INSTRUMENTATION_H_ #include #ifndef GEMMLOWP_USE_STLPORT #include #else #include namespace std { using ::int16_t; using ::int32_t; using ::int8_t; using ::size_t; using ::uint16_t; using ::uint32_t; using ::uint8_t; using ::uintptr_t; } // namespace std #endif #include #include #include #ifdef GEMMLOWP_PROFILING #include #include #endif #include "./pthread_everywhere.h" namespace gemmlowp { inline void ReleaseBuildAssertion(bool condition, const char* msg) { if (!condition) { fprintf(stderr, "gemmlowp error: %s\n", msg); abort(); } } class Mutex { public: Mutex(const Mutex&) = delete; Mutex& operator=(const Mutex&) = delete; Mutex() { pthread_mutex_init(&m, NULL); } ~Mutex() { pthread_mutex_destroy(&m); } void Lock() { pthread_mutex_lock(&m); } void Unlock() { pthread_mutex_unlock(&m); } private: pthread_mutex_t m; }; class GlobalMutexes { public: static Mutex* Profiler() { static Mutex m; return &m; } static Mutex* EightBitIntGemm() { static Mutex m; return &m; } }; // A very simple RAII helper to lock and unlock a Mutex struct ScopedLock { ScopedLock(Mutex* m) : _m(m) { _m->Lock(); } ~ScopedLock() { _m->Unlock(); } private: Mutex* _m; }; // Profiling definitions. Two paths: when profiling is enabled, // and when profiling is disabled. #ifdef GEMMLOWP_PROFILING // This code path is when profiling is enabled. // A pseudo-call-stack. Contrary to a real call-stack, this only // contains pointers to literal strings that were manually entered // in the instrumented code (see ScopedProfilingLabel). struct ProfilingStack { static const std::size_t kMaxSize = 14; typedef const char* LabelsArrayType[kMaxSize]; LabelsArrayType labels; std::size_t size; Mutex* lock; ProfilingStack() { memset(this, 0, sizeof(ProfilingStack)); } ~ProfilingStack() { delete lock; } void Push(const char* label) { ScopedLock sl(lock); ReleaseBuildAssertion(size < kMaxSize, "ProfilingStack overflow"); labels[size] = label; size++; } void Pop() { ScopedLock sl(lock); ReleaseBuildAssertion(size > 0, "ProfilingStack underflow"); size--; } void UpdateTop(const char* new_label) { ScopedLock sl(lock); assert(size); labels[size - 1] = new_label; } ProfilingStack& operator=(const ProfilingStack& other) { memcpy(this, &other, sizeof(ProfilingStack)); return *this; } bool operator==(const ProfilingStack& other) const { return !memcmp(this, &other, sizeof(ProfilingStack)); } }; static_assert( !(sizeof(ProfilingStack) & (sizeof(ProfilingStack) - 1)), "ProfilingStack should have power-of-two size to fit in cache lines"); struct ThreadInfo; // The global set of threads being profiled. inline std::set& ThreadsUnderProfiling() { static std::set v; return v; } struct ThreadInfo { pthread_key_t key; // used only to get a callback at thread exit. ProfilingStack stack; ThreadInfo() { pthread_key_create(&key, ThreadExitCallback); pthread_setspecific(key, this); stack.lock = new Mutex(); } static void ThreadExitCallback(void* ptr) { ScopedLock sl(GlobalMutexes::Profiler()); ThreadInfo* self = static_cast(ptr); ThreadsUnderProfiling().erase(self); } }; inline ThreadInfo& ThreadLocalThreadInfo() { static pthread_key_t key; static auto DeleteThreadInfo = [](void* threadInfoPtr) { ThreadInfo* threadInfo = static_cast(threadInfoPtr); if (threadInfo) { delete threadInfo; } }; // key_result is unused. The purpose of this 'static' local object is // to have its initializer (the pthread_key_create call) performed exactly // once, in a way that is guaranteed (since C++11) to be reentrant. static const int key_result = pthread_key_create(&key, DeleteThreadInfo); (void)key_result; ThreadInfo* threadInfo = static_cast(pthread_getspecific(key)); if (!threadInfo) { threadInfo = new ThreadInfo(); pthread_setspecific(key, threadInfo); } return *threadInfo; } // ScopedProfilingLabel is how one instruments code for profiling // with this profiler. Construct local ScopedProfilingLabel variables, // passing a literal string describing the local code. Profile // samples will then be annotated with this label, while it is in scope // (whence the name --- also known as RAII). // See the example in profiler.h. class ScopedProfilingLabel { ProfilingStack* profiling_stack_; public: explicit ScopedProfilingLabel(const char* label) : profiling_stack_(&ThreadLocalThreadInfo().stack) { profiling_stack_->Push(label); } ~ScopedProfilingLabel() { profiling_stack_->Pop(); } void Update(const char* new_label) { profiling_stack_->UpdateTop(new_label); } }; // To be called once on each thread to be profiled. inline void RegisterCurrentThreadForProfiling() { ScopedLock sl(GlobalMutexes::Profiler()); ThreadsUnderProfiling().insert(&ThreadLocalThreadInfo()); } #else // not GEMMLOWP_PROFILING // This code path is when profiling is disabled. // This empty definition of ScopedProfilingLabel ensures that // it has zero runtime overhead when profiling is disabled. struct ScopedProfilingLabel { explicit ScopedProfilingLabel(const char*) {} void Update(const char*) {} }; inline void RegisterCurrentThreadForProfiling() {} #endif } // end namespace gemmlowp #endif // GEMMLOWP_PROFILING_INSTRUMENTATION_H_