// basisu_transcoder_internal.h - Universal texture format transcoder library. // Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #ifdef _MSC_VER #pragma warning (disable: 4127) // conditional expression is constant #endif #define BASISD_LIB_VERSION 116 #define BASISD_VERSION_STRING "01.16" #ifdef _DEBUG #define BASISD_BUILD_DEBUG #else #define BASISD_BUILD_RELEASE #endif #include "basisu.h" #define BASISD_znew (z = 36969 * (z & 65535) + (z >> 16)) namespace basisu { extern bool g_debug_printf; } namespace basist { // Low-level formats directly supported by the transcoder (other supported texture formats are combinations of these low-level block formats). // You probably don't care about these enum's unless you are going pretty low-level and calling the transcoder to decode individual slices. enum class block_format { cETC1, // ETC1S RGB cETC2_RGBA, // full ETC2 EAC RGBA8 block cBC1, // DXT1 RGB cBC3, // BC4 block followed by a four color BC1 block cBC4, // DXT5A (alpha block only) cBC5, // two BC4 blocks cPVRTC1_4_RGB, // opaque-only PVRTC1 4bpp cPVRTC1_4_RGBA, // PVRTC1 4bpp RGBA cBC7, // Full BC7 block, any mode cBC7_M5_COLOR, // RGB BC7 mode 5 color (writes an opaque mode 5 block) cBC7_M5_ALPHA, // alpha portion of BC7 mode 5 (cBC7_M5_COLOR output data must have been written to the output buffer first to set the mode/rot fields etc.) cETC2_EAC_A8, // alpha block of ETC2 EAC (first 8 bytes of the 16-bit ETC2 EAC RGBA format) cASTC_4x4, // ASTC 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB is not enabled when outputting ASTC // data. If you use a sRGB ASTC format you'll get ~1 LSB of additional error, because of the different way ASTC decoders scale 8-bit endpoints to 16-bits during unpacking. cATC_RGB, cATC_RGBA_INTERPOLATED_ALPHA, cFXT1_RGB, // Opaque-only, has oddball 8x4 pixel block size cPVRTC2_4_RGB, cPVRTC2_4_RGBA, cETC2_EAC_R11, cETC2_EAC_RG11, cIndices, // Used internally: Write 16-bit endpoint and selector indices directly to output (output block must be at least 32-bits) cRGB32, // Writes RGB components to 32bpp output pixels cRGBA32, // Writes RGB255 components to 32bpp output pixels cA32, // Writes alpha component to 32bpp output pixels cRGB565, cBGR565, cRGBA4444_COLOR, cRGBA4444_ALPHA, cRGBA4444_COLOR_OPAQUE, cRGBA4444, cUASTC_4x4, cTotalBlockFormats }; const int COLOR5_PAL0_PREV_HI = 9, COLOR5_PAL0_DELTA_LO = -9, COLOR5_PAL0_DELTA_HI = 31; const int COLOR5_PAL1_PREV_HI = 21, COLOR5_PAL1_DELTA_LO = -21, COLOR5_PAL1_DELTA_HI = 21; const int COLOR5_PAL2_PREV_HI = 31, COLOR5_PAL2_DELTA_LO = -31, COLOR5_PAL2_DELTA_HI = 9; const int COLOR5_PAL_MIN_DELTA_B_RUNLEN = 3, COLOR5_PAL_DELTA_5_RUNLEN_VLC_BITS = 3; const uint32_t ENDPOINT_PRED_TOTAL_SYMBOLS = (4 * 4 * 4 * 4) + 1; const uint32_t ENDPOINT_PRED_REPEAT_LAST_SYMBOL = ENDPOINT_PRED_TOTAL_SYMBOLS - 1; const uint32_t ENDPOINT_PRED_MIN_REPEAT_COUNT = 3; const uint32_t ENDPOINT_PRED_COUNT_VLC_BITS = 4; const uint32_t NUM_ENDPOINT_PREDS = 3;// BASISU_ARRAY_SIZE(g_endpoint_preds); const uint32_t CR_ENDPOINT_PRED_INDEX = NUM_ENDPOINT_PREDS - 1; const uint32_t NO_ENDPOINT_PRED_INDEX = 3;//NUM_ENDPOINT_PREDS; const uint32_t MAX_SELECTOR_HISTORY_BUF_SIZE = 64; const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH = 3; const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_BITS = 6; const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL = (1 << SELECTOR_HISTORY_BUF_RLE_COUNT_BITS); uint16_t crc16(const void *r, size_t size, uint16_t crc); class huffman_decoding_table { friend class bitwise_decoder; public: huffman_decoding_table() { } void clear() { basisu::clear_vector(m_code_sizes); basisu::clear_vector(m_lookup); basisu::clear_vector(m_tree); } bool init(uint32_t total_syms, const uint8_t *pCode_sizes, uint32_t fast_lookup_bits = basisu::cHuffmanFastLookupBits) { if (!total_syms) { clear(); return true; } m_code_sizes.resize(total_syms); memcpy(&m_code_sizes[0], pCode_sizes, total_syms); const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits; m_lookup.resize(0); m_lookup.resize(huffman_fast_lookup_size); m_tree.resize(0); m_tree.resize(total_syms * 2); uint32_t syms_using_codesize[basisu::cHuffmanMaxSupportedInternalCodeSize + 1]; basisu::clear_obj(syms_using_codesize); for (uint32_t i = 0; i < total_syms; i++) { if (pCode_sizes[i] > basisu::cHuffmanMaxSupportedInternalCodeSize) return false; syms_using_codesize[pCode_sizes[i]]++; } uint32_t next_code[basisu::cHuffmanMaxSupportedInternalCodeSize + 1]; next_code[0] = next_code[1] = 0; uint32_t used_syms = 0, total = 0; for (uint32_t i = 1; i < basisu::cHuffmanMaxSupportedInternalCodeSize; i++) { used_syms += syms_using_codesize[i]; next_code[i + 1] = (total = ((total + syms_using_codesize[i]) << 1)); } if (((1U << basisu::cHuffmanMaxSupportedInternalCodeSize) != total) && (used_syms != 1U)) return false; for (int tree_next = -1, sym_index = 0; sym_index < (int)total_syms; ++sym_index) { uint32_t rev_code = 0, l, cur_code, code_size = pCode_sizes[sym_index]; if (!code_size) continue; cur_code = next_code[code_size]++; for (l = code_size; l > 0; l--, cur_code >>= 1) rev_code = (rev_code << 1) | (cur_code & 1); if (code_size <= fast_lookup_bits) { uint32_t k = (code_size << 16) | sym_index; while (rev_code < huffman_fast_lookup_size) { if (m_lookup[rev_code] != 0) { // Supplied codesizes can't create a valid prefix code. return false; } m_lookup[rev_code] = k; rev_code += (1 << code_size); } continue; } int tree_cur; if (0 == (tree_cur = m_lookup[rev_code & (huffman_fast_lookup_size - 1)])) { const uint32_t idx = rev_code & (huffman_fast_lookup_size - 1); if (m_lookup[idx] != 0) { // Supplied codesizes can't create a valid prefix code. return false; } m_lookup[idx] = tree_next; tree_cur = tree_next; tree_next -= 2; } if (tree_cur >= 0) { // Supplied codesizes can't create a valid prefix code. return false; } rev_code >>= (fast_lookup_bits - 1); for (int j = code_size; j > ((int)fast_lookup_bits + 1); j--) { tree_cur -= ((rev_code >>= 1) & 1); const int idx = -tree_cur - 1; if (idx < 0) return false; else if (idx >= (int)m_tree.size()) m_tree.resize(idx + 1); if (!m_tree[idx]) { m_tree[idx] = (int16_t)tree_next; tree_cur = tree_next; tree_next -= 2; } else { tree_cur = m_tree[idx]; if (tree_cur >= 0) { // Supplied codesizes can't create a valid prefix code. return false; } } } tree_cur -= ((rev_code >>= 1) & 1); const int idx = -tree_cur - 1; if (idx < 0) return false; else if (idx >= (int)m_tree.size()) m_tree.resize(idx + 1); if (m_tree[idx] != 0) { // Supplied codesizes can't create a valid prefix code. return false; } m_tree[idx] = (int16_t)sym_index; } return true; } const basisu::uint8_vec &get_code_sizes() const { return m_code_sizes; } const basisu::int_vec &get_lookup() const { return m_lookup; } const basisu::int16_vec &get_tree() const { return m_tree; } bool is_valid() const { return m_code_sizes.size() > 0; } private: basisu::uint8_vec m_code_sizes; basisu::int_vec m_lookup; basisu::int16_vec m_tree; }; class bitwise_decoder { public: bitwise_decoder() : m_buf_size(0), m_pBuf(nullptr), m_pBuf_start(nullptr), m_pBuf_end(nullptr), m_bit_buf(0), m_bit_buf_size(0) { } void clear() { m_buf_size = 0; m_pBuf = nullptr; m_pBuf_start = nullptr; m_pBuf_end = nullptr; m_bit_buf = 0; m_bit_buf_size = 0; } bool init(const uint8_t *pBuf, uint32_t buf_size) { if ((!pBuf) && (buf_size)) return false; m_buf_size = buf_size; m_pBuf = pBuf; m_pBuf_start = pBuf; m_pBuf_end = pBuf + buf_size; m_bit_buf = 0; m_bit_buf_size = 0; return true; } void stop() { } inline uint32_t peek_bits(uint32_t num_bits) { if (!num_bits) return 0; assert(num_bits <= 25); while (m_bit_buf_size < num_bits) { uint32_t c = 0; if (m_pBuf < m_pBuf_end) c = *m_pBuf++; m_bit_buf |= (c << m_bit_buf_size); m_bit_buf_size += 8; assert(m_bit_buf_size <= 32); } return m_bit_buf & ((1 << num_bits) - 1); } void remove_bits(uint32_t num_bits) { assert(m_bit_buf_size >= num_bits); m_bit_buf >>= num_bits; m_bit_buf_size -= num_bits; } uint32_t get_bits(uint32_t num_bits) { if (num_bits > 25) { assert(num_bits <= 32); const uint32_t bits0 = peek_bits(25); m_bit_buf >>= 25; m_bit_buf_size -= 25; num_bits -= 25; const uint32_t bits = peek_bits(num_bits); m_bit_buf >>= num_bits; m_bit_buf_size -= num_bits; return bits0 | (bits << 25); } const uint32_t bits = peek_bits(num_bits); m_bit_buf >>= num_bits; m_bit_buf_size -= num_bits; return bits; } uint32_t decode_truncated_binary(uint32_t n) { assert(n >= 2); const uint32_t k = basisu::floor_log2i(n); const uint32_t u = (1 << (k + 1)) - n; uint32_t result = get_bits(k); if (result >= u) result = ((result << 1) | get_bits(1)) - u; return result; } uint32_t decode_rice(uint32_t m) { assert(m); uint32_t q = 0; for (;;) { uint32_t k = peek_bits(16); uint32_t l = 0; while (k & 1) { l++; k >>= 1; } q += l; remove_bits(l); if (l < 16) break; } return (q << m) + (get_bits(m + 1) >> 1); } inline uint32_t decode_vlc(uint32_t chunk_bits) { assert(chunk_bits); const uint32_t chunk_size = 1 << chunk_bits; const uint32_t chunk_mask = chunk_size - 1; uint32_t v = 0; uint32_t ofs = 0; for ( ; ; ) { uint32_t s = get_bits(chunk_bits + 1); v |= ((s & chunk_mask) << ofs); ofs += chunk_bits; if ((s & chunk_size) == 0) break; if (ofs >= 32) { assert(0); break; } } return v; } inline uint32_t decode_huffman(const huffman_decoding_table &ct, int fast_lookup_bits = basisu::cHuffmanFastLookupBits) { assert(ct.m_code_sizes.size()); const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits; while (m_bit_buf_size < 16) { uint32_t c = 0; if (m_pBuf < m_pBuf_end) c = *m_pBuf++; m_bit_buf |= (c << m_bit_buf_size); m_bit_buf_size += 8; assert(m_bit_buf_size <= 32); } int code_len; int sym; if ((sym = ct.m_lookup[m_bit_buf & (huffman_fast_lookup_size - 1)]) >= 0) { code_len = sym >> 16; sym &= 0xFFFF; } else { code_len = fast_lookup_bits; do { sym = ct.m_tree[~sym + ((m_bit_buf >> code_len++) & 1)]; // ~sym = -sym - 1 } while (sym < 0); } m_bit_buf >>= code_len; m_bit_buf_size -= code_len; return sym; } bool read_huffman_table(huffman_decoding_table &ct) { ct.clear(); const uint32_t total_used_syms = get_bits(basisu::cHuffmanMaxSymsLog2); if (!total_used_syms) return true; if (total_used_syms > basisu::cHuffmanMaxSyms) return false; uint8_t code_length_code_sizes[basisu::cHuffmanTotalCodelengthCodes]; basisu::clear_obj(code_length_code_sizes); const uint32_t num_codelength_codes = get_bits(5); if ((num_codelength_codes < 1) || (num_codelength_codes > basisu::cHuffmanTotalCodelengthCodes)) return false; for (uint32_t i = 0; i < num_codelength_codes; i++) code_length_code_sizes[basisu::g_huffman_sorted_codelength_codes[i]] = static_cast(get_bits(3)); huffman_decoding_table code_length_table; if (!code_length_table.init(basisu::cHuffmanTotalCodelengthCodes, code_length_code_sizes)) return false; if (!code_length_table.is_valid()) return false; basisu::uint8_vec code_sizes(total_used_syms); uint32_t cur = 0; while (cur < total_used_syms) { int c = decode_huffman(code_length_table); if (c <= 16) code_sizes[cur++] = static_cast(c); else if (c == basisu::cHuffmanSmallZeroRunCode) cur += get_bits(basisu::cHuffmanSmallZeroRunExtraBits) + basisu::cHuffmanSmallZeroRunSizeMin; else if (c == basisu::cHuffmanBigZeroRunCode) cur += get_bits(basisu::cHuffmanBigZeroRunExtraBits) + basisu::cHuffmanBigZeroRunSizeMin; else { if (!cur) return false; uint32_t l; if (c == basisu::cHuffmanSmallRepeatCode) l = get_bits(basisu::cHuffmanSmallRepeatExtraBits) + basisu::cHuffmanSmallRepeatSizeMin; else l = get_bits(basisu::cHuffmanBigRepeatExtraBits) + basisu::cHuffmanBigRepeatSizeMin; const uint8_t prev = code_sizes[cur - 1]; if (prev == 0) return false; do { if (cur >= total_used_syms) return false; code_sizes[cur++] = prev; } while (--l > 0); } } if (cur != total_used_syms) return false; return ct.init(total_used_syms, &code_sizes[0]); } private: uint32_t m_buf_size; const uint8_t *m_pBuf; const uint8_t *m_pBuf_start; const uint8_t *m_pBuf_end; uint32_t m_bit_buf; uint32_t m_bit_buf_size; }; inline uint32_t basisd_rand(uint32_t seed) { if (!seed) seed++; uint32_t z = seed; BASISD_znew; return z; } // Returns random number in [0,limit). Max limit is 0xFFFF. inline uint32_t basisd_urand(uint32_t& seed, uint32_t limit) { seed = basisd_rand(seed); return (((seed ^ (seed >> 16)) & 0xFFFF) * limit) >> 16; } class approx_move_to_front { public: approx_move_to_front(uint32_t n) { init(n); } void init(uint32_t n) { m_values.resize(n); m_rover = n / 2; } const basisu::int_vec& get_values() const { return m_values; } basisu::int_vec& get_values() { return m_values; } uint32_t size() const { return (uint32_t)m_values.size(); } const int& operator[] (uint32_t index) const { return m_values[index]; } int operator[] (uint32_t index) { return m_values[index]; } void add(int new_value) { m_values[m_rover++] = new_value; if (m_rover == m_values.size()) m_rover = (uint32_t)m_values.size() / 2; } void use(uint32_t index) { if (index) { //std::swap(m_values[index / 2], m_values[index]); int x = m_values[index / 2]; int y = m_values[index]; m_values[index / 2] = y; m_values[index] = x; } } // returns -1 if not found int find(int value) const { for (uint32_t i = 0; i < m_values.size(); i++) if (m_values[i] == value) return i; return -1; } void reset() { const uint32_t n = (uint32_t)m_values.size(); m_values.clear(); init(n); } private: basisu::int_vec m_values; uint32_t m_rover; }; struct decoder_etc_block; inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); } enum eNoClamp { cNoClamp = 0 }; struct color32 { union { struct { uint8_t r; uint8_t g; uint8_t b; uint8_t a; }; uint8_t c[4]; uint32_t m; }; color32() { } color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } color32(eNoClamp unused, uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { (void)unused; set_noclamp_rgba(vr, vg, vb, va); } void set(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { c[0] = static_cast(vr); c[1] = static_cast(vg); c[2] = static_cast(vb); c[3] = static_cast(va); } void set_noclamp_rgb(uint32_t vr, uint32_t vg, uint32_t vb) { c[0] = static_cast(vr); c[1] = static_cast(vg); c[2] = static_cast(vb); } void set_noclamp_rgba(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } void set_clamped(int vr, int vg, int vb, int va) { c[0] = clamp255(vr); c[1] = clamp255(vg); c[2] = clamp255(vb); c[3] = clamp255(va); } uint8_t operator[] (uint32_t idx) const { assert(idx < 4); return c[idx]; } uint8_t &operator[] (uint32_t idx) { assert(idx < 4); return c[idx]; } bool operator== (const color32&rhs) const { return m == rhs.m; } static color32 comp_min(const color32& a, const color32& b) { return color32(cNoClamp, basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); } static color32 comp_max(const color32& a, const color32& b) { return color32(cNoClamp, basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); } }; struct endpoint { color32 m_color5; uint8_t m_inten5; bool operator== (const endpoint& rhs) const { return (m_color5.r == rhs.m_color5.r) && (m_color5.g == rhs.m_color5.g) && (m_color5.b == rhs.m_color5.b) && (m_inten5 == rhs.m_inten5); } bool operator!= (const endpoint& rhs) const { return !(*this == rhs); } }; struct selector { // Plain selectors (2-bits per value) uint8_t m_selectors[4]; // ETC1 selectors uint8_t m_bytes[4]; uint8_t m_lo_selector, m_hi_selector; uint8_t m_num_unique_selectors; bool operator== (const selector& rhs) const { return (m_selectors[0] == rhs.m_selectors[0]) && (m_selectors[1] == rhs.m_selectors[1]) && (m_selectors[2] == rhs.m_selectors[2]) && (m_selectors[3] == rhs.m_selectors[3]); } bool operator!= (const selector& rhs) const { return !(*this == rhs); } void init_flags() { uint32_t hist[4] = { 0, 0, 0, 0 }; for (uint32_t y = 0; y < 4; y++) { for (uint32_t x = 0; x < 4; x++) { uint32_t s = get_selector(x, y); hist[s]++; } } m_lo_selector = 3; m_hi_selector = 0; m_num_unique_selectors = 0; for (uint32_t i = 0; i < 4; i++) { if (hist[i]) { m_num_unique_selectors++; if (i < m_lo_selector) m_lo_selector = static_cast(i); if (i > m_hi_selector) m_hi_selector = static_cast(i); } } } // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables. inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4) && (y < 4)); return (m_selectors[y] >> (x * 2)) & 3; } void set_selector(uint32_t x, uint32_t y, uint32_t val) { static const uint8_t s_selector_index_to_etc1[4] = { 3, 2, 0, 1 }; assert((x | y | val) < 4); m_selectors[y] &= ~(3 << (x * 2)); m_selectors[y] |= (val << (x * 2)); const uint32_t etc1_bit_index = x * 4 + y; uint8_t *p = &m_bytes[3 - (etc1_bit_index >> 3)]; const uint32_t byte_bit_ofs = etc1_bit_index & 7; const uint32_t mask = 1 << byte_bit_ofs; const uint32_t etc1_val = s_selector_index_to_etc1[val]; const uint32_t lsb = etc1_val & 1; const uint32_t msb = etc1_val >> 1; p[0] &= ~mask; p[0] |= (lsb << byte_bit_ofs); p[-2] &= ~mask; p[-2] |= (msb << byte_bit_ofs); } }; bool basis_block_format_is_uncompressed(block_format tex_type); } // namespace basist