/*==========================================================================; * * Copyright (C) Microsoft Corporation. All Rights Reserved. * * File: PIXEventsCommon.h * Content: PIX include file * Don't include this file directly - use pix3.h * ****************************************************************************/ #pragma once #ifndef _PIXEventsCommon_H_ #define _PIXEventsCommon_H_ #if defined(_AMD64_) || defined(_X86_) #include #endif // _AMD64_ || _X86_ enum PIXEventType { PIXEvent_EndEvent = 0x000, PIXEvent_BeginEvent_VarArgs = 0x001, PIXEvent_BeginEvent_NoArgs = 0x002, PIXEvent_SetMarker_VarArgs = 0x007, PIXEvent_SetMarker_NoArgs = 0x008, PIXEvent_EndEvent_OnContext = 0x010, PIXEvent_BeginEvent_OnContext_VarArgs = 0x011, PIXEvent_BeginEvent_OnContext_NoArgs = 0x012, PIXEvent_SetMarker_OnContext_VarArgs = 0x017, PIXEvent_SetMarker_OnContext_NoArgs = 0x018, }; static const UINT64 PIXEventsReservedRecordSpaceQwords = 64; //this is used to make sure SSE string copy always will end 16-byte write in the current block //this way only a check if destination < limit can be performed, instead of destination < limit - 1 //since both these are UINT64* and SSE writes in 16 byte chunks, 8 bytes are kept in reserve //so even if SSE overwrites 8 extra bytes, those will still belong to the correct block //on next iteration check destination will be greater than limit //this is used as well for fixed size UMD events and PIXEndEvent since these require less space //than other variable length user events and do not need big reserved space static const UINT64 PIXEventsReservedTailSpaceQwords = 2; static const UINT64 PIXEventsSafeFastCopySpaceQwords = PIXEventsReservedRecordSpaceQwords - PIXEventsReservedTailSpaceQwords; static const UINT64 PIXEventsGraphicsRecordSpaceQwords = 64; //Bits 7-19 (13 bits) static const UINT64 PIXEventsBlockEndMarker = 0x00000000000FFF80; //Bits 10-19 (10 bits) static const UINT64 PIXEventsTypeReadMask = 0x00000000000FFC00; static const UINT64 PIXEventsTypeWriteMask = 0x00000000000003FF; static const UINT64 PIXEventsTypeBitShift = 10; //Bits 20-63 (44 bits) static const UINT64 PIXEventsTimestampReadMask = 0xFFFFFFFFFFF00000; static const UINT64 PIXEventsTimestampWriteMask = 0x00000FFFFFFFFFFF; static const UINT64 PIXEventsTimestampBitShift = 20; inline UINT64 PIXEncodeEventInfo(UINT64 timestamp, PIXEventType eventType) { return ((timestamp & PIXEventsTimestampWriteMask) << PIXEventsTimestampBitShift) | (((UINT64)eventType & PIXEventsTypeWriteMask) << PIXEventsTypeBitShift); } //Bits 60-63 (4) static const UINT64 PIXEventsStringAlignmentWriteMask = 0x000000000000000F; static const UINT64 PIXEventsStringAlignmentReadMask = 0xF000000000000000; static const UINT64 PIXEventsStringAlignmentBitShift = 60; //Bits 55-59 (5) static const UINT64 PIXEventsStringCopyChunkSizeWriteMask = 0x000000000000001F; static const UINT64 PIXEventsStringCopyChunkSizeReadMask = 0x0F80000000000000; static const UINT64 PIXEventsStringCopyChunkSizeBitShift = 55; //Bit 54 static const UINT64 PIXEventsStringIsANSIWriteMask = 0x0000000000000001; static const UINT64 PIXEventsStringIsANSIReadMask = 0x0040000000000000; static const UINT64 PIXEventsStringIsANSIBitShift = 54; //Bit 53 static const UINT64 PIXEventsStringIsShortcutWriteMask = 0x0000000000000001; static const UINT64 PIXEventsStringIsShortcutReadMask = 0x0020000000000000; static const UINT64 PIXEventsStringIsShortcutBitShift = 53; inline UINT64 PIXEncodeStringInfo(UINT64 alignment, UINT64 copyChunkSize, BOOL isANSI, BOOL isShortcut) { return ((alignment & PIXEventsStringAlignmentWriteMask) << PIXEventsStringAlignmentBitShift) | ((copyChunkSize & PIXEventsStringCopyChunkSizeWriteMask) << PIXEventsStringCopyChunkSizeBitShift) | (((UINT64)isANSI & PIXEventsStringIsANSIWriteMask) << PIXEventsStringIsANSIBitShift) | (((UINT64)isShortcut & PIXEventsStringIsShortcutWriteMask) << PIXEventsStringIsShortcutBitShift); } template inline bool PIXIsPointerAligned(T* pointer) { return !(((UINT64)pointer) & (alignment - 1)); } template inline void PIXCopyEventArgument(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, T argument) { if (destination < limit) { *((T*)destination) = argument; ++destination; } } //floats must be cast to double during writing the data to be properly printed later when reading the data //this is needed because when float is passed to varargs function it's cast to double template<> inline void PIXCopyEventArgument(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, float argument) { if (destination < limit) { *((double*)destination) = (double)(argument); ++destination; } } //char has to be cast to a longer signed integer type //this is due to printf not ignoring correctly the upper bits of unsigned long long for a char format specifier template<> inline void PIXCopyEventArgument(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, char argument) { if (destination < limit) { *((INT64*)destination) = (INT64)(argument); ++destination; } } //unsigned char has to be cast to a longer unsigned integer type //this is due to printf not ignoring correctly the upper bits of unsigned long long for a char format specifier template<> inline void PIXCopyEventArgument(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, unsigned char argument) { if (destination < limit) { *destination = (UINT64)(argument); ++destination; } } //bool has to be cast to an integer since it's not explicitly supported by string format routines //there's no format specifier for bool type, but it should work with integer format specifiers template<> inline void PIXCopyEventArgument(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, bool argument) { if (destination < limit) { *destination = (UINT64)(argument); ++destination; } } inline void PIXCopyEventArgumentSlowest(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument) { *destination++ = PIXEncodeStringInfo(0, 8, TRUE, FALSE); while (destination < limit) { UINT64 c = argument[0]; if (!c) { *destination++ = 0; return; } UINT64 x = c; c = argument[1]; if (!c) { *destination++ = x; return; } x |= c << 8; c = argument[2]; if (!c) { *destination++ = x; return; } x |= c << 16; c = argument[3]; if (!c) { *destination++ = x; return; } x |= c << 24; c = argument[4]; if (!c) { *destination++ = x; return; } x |= c << 32; c = argument[5]; if (!c) { *destination++ = x; return; } x |= c << 40; c = argument[6]; if (!c) { *destination++ = x; return; } x |= c << 48; c = argument[7]; if (!c) { *destination++ = x; return; } x |= c << 56; *destination++ = x; argument += 8; } } inline void PIXCopyEventArgumentSlow(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument) { if (PIXIsPointerAligned<8>(argument)) { *destination++ = PIXEncodeStringInfo(0, 8, TRUE, FALSE); UINT64* source = (UINT64*)argument; while (destination < limit) { UINT64 qword = *source++; *destination++ = qword; //check if any of the characters is a terminating zero if (!((qword & 0xFF00000000000000) && (qword & 0xFF000000000000) && (qword & 0xFF0000000000) && (qword & 0xFF00000000) && (qword & 0xFF000000) && (qword & 0xFF0000) && (qword & 0xFF00) && (qword & 0xFF))) { break; } } } else { PIXCopyEventArgumentSlowest(destination, limit, argument); } } template<> inline void PIXCopyEventArgument(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument) { if (destination < limit) { if (argument != nullptr) { #if defined(_AMD64_) || defined(_X86_) if (PIXIsPointerAligned<16>(argument)) { *destination++ = PIXEncodeStringInfo(0, 16, TRUE, FALSE); __m128i zero = _mm_setzero_si128(); if (PIXIsPointerAligned<16>(destination)) { while (destination < limit) { __m128i mem = _mm_load_si128((__m128i*)argument); _mm_store_si128((__m128i*)destination, mem); //check if any of the characters is a terminating zero __m128i res = _mm_cmpeq_epi8(mem, zero); destination += 2; if (_mm_movemask_epi8(res)) break; argument += 16; } } else { while (destination < limit) { __m128i mem = _mm_load_si128((__m128i*)argument); _mm_storeu_si128((__m128i*)destination, mem); //check if any of the characters is a terminating zero __m128i res = _mm_cmpeq_epi8(mem, zero); destination += 2; if (_mm_movemask_epi8(res)) break; argument += 16; } } } else #endif // _AMD64_ || _X86_ { PIXCopyEventArgumentSlow(destination, limit, argument); } } else { *destination++ = 0ull; } } } template<> inline void PIXCopyEventArgument(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PSTR argument) { PIXCopyEventArgument(destination, limit, (PCSTR)argument); } inline void PIXCopyEventArgumentSlowest(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument) { *destination++ = PIXEncodeStringInfo(0, 8, FALSE, FALSE); while (destination < limit) { UINT64 c = argument[0]; if (!c) { *destination++ = 0; return; } UINT64 x = c; c = argument[1]; if (!c) { *destination++ = x; return; } x |= c << 16; c = argument[2]; if (!c) { *destination++ = x; return; } x |= c << 32; c = argument[3]; if (!c) { *destination++ = x; return; } x |= c << 48; *destination++ = x; argument += 4; } } inline void PIXCopyEventArgumentSlow(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument) { if (PIXIsPointerAligned<8>(argument)) { *destination++ = PIXEncodeStringInfo(0, 8, FALSE, FALSE); UINT64* source = (UINT64*)argument; while (destination < limit) { UINT64 qword = *source++; *destination++ = qword; //check if any of the characters is a terminating zero //TODO: check if reversed condition is faster if (!((qword & 0xFFFF000000000000) && (qword & 0xFFFF00000000) && (qword & 0xFFFF0000) && (qword & 0xFFFF))) { break; } } } else { PIXCopyEventArgumentSlowest(destination, limit, argument); } } template<> inline void PIXCopyEventArgument(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument) { if (destination < limit) { if (argument != nullptr) { #if defined(_AMD64_) || defined(_X86_) if (PIXIsPointerAligned<16>(argument)) { *destination++ = PIXEncodeStringInfo(0, 16, FALSE, FALSE); __m128i zero = _mm_setzero_si128(); if (PIXIsPointerAligned<16>(destination)) { while (destination < limit) { __m128i mem = _mm_load_si128((__m128i*)argument); _mm_store_si128((__m128i*)destination, mem); //check if any of the characters is a terminating zero __m128i res = _mm_cmpeq_epi16(mem, zero); destination += 2; if (_mm_movemask_epi8(res)) break; argument += 8; } } else { while (destination < limit) { __m128i mem = _mm_load_si128((__m128i*)argument); _mm_storeu_si128((__m128i*)destination, mem); //check if any of the characters is a terminating zero __m128i res = _mm_cmpeq_epi16(mem, zero); destination += 2; if (_mm_movemask_epi8(res)) break; argument += 8; } } } else #endif // _AMD64_ || _X86_ { PIXCopyEventArgumentSlow(destination, limit, argument); } } else { *destination++ = 0ull; } } } template<> inline void PIXCopyEventArgument(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PWSTR argument) { PIXCopyEventArgument(destination, limit, (PCWSTR)argument); }; #if defined(__d3d12_x_h__) || defined(__d3d12_h__) inline void PIXSetMarkerOnContext(_In_ ID3D12GraphicsCommandList* commandList, _In_reads_bytes_(size) void* data, UINT size) { commandList->SetMarker(D3D12_EVENT_METADATA, data, size); } inline void PIXSetMarkerOnContext(_In_ ID3D12CommandQueue* commandQueue, _In_reads_bytes_(size) void* data, UINT size) { commandQueue->SetMarker(D3D12_EVENT_METADATA, data, size); } inline void PIXBeginEventOnContext(_In_ ID3D12GraphicsCommandList* commandList, _In_reads_bytes_(size) void* data, UINT size) { commandList->BeginEvent(D3D12_EVENT_METADATA, data, size); } inline void PIXBeginEventOnContext(_In_ ID3D12CommandQueue* commandQueue, _In_reads_bytes_(size) void* data, UINT size) { commandQueue->BeginEvent(D3D12_EVENT_METADATA, data, size); } inline void PIXEndEventOnContext(_In_ ID3D12GraphicsCommandList* commandList) { commandList->EndEvent(); } inline void PIXEndEventOnContext(_In_ ID3D12CommandQueue* commandQueue) { commandQueue->EndEvent(); } #endif //__d3d12_x_h__ template struct PIXInferScopedEventType { typedef T Type; }; template struct PIXInferScopedEventType { typedef T Type; }; template struct PIXInferScopedEventType { typedef T Type; }; template struct PIXInferScopedEventType { typedef T Type; }; template<> struct PIXInferScopedEventType { typedef void Type; }; template<> struct PIXInferScopedEventType { typedef void Type; }; template<> struct PIXInferScopedEventType { typedef void Type; }; template<> struct PIXInferScopedEventType { typedef void Type; }; template<> struct PIXInferScopedEventType { typedef void Type; }; template<> struct PIXInferScopedEventType { typedef void Type; }; template<> struct PIXInferScopedEventType { typedef void Type; }; template<> struct PIXInferScopedEventType { typedef void Type; }; #endif //_PIXEventsCommon_H_