/** Copyright (C) powturbo 2013-2023 SPDX-License-Identifier: GPL v2 License This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - homepage : https://sites.google.com/site/powturbo/ - github : https://github.com/powturbo - twitter : https://twitter.com/powturbo - email : powturbo [_AT_] gmail [_DOT_] com **/ // "Integer Compression" variable simple #ifndef USIZE #ifdef __SSE2__ #include #elif defined(__ARM_NEON) #include #include "include_/sse_neon.h" #endif #include #include #include #include "include_/vlcbyte.h" #include "include_/conf.h" #pragma warning( disable : 4005) #pragma warning( disable : 4090) #pragma warning( disable : 4068) #ifdef __ARM_NEON #define PREFETCH(_ip_,_rw_) #else #define PREFETCH(_ip_,_rw_) __builtin_prefetch(_ip_,_rw_) #endif size_t vsbound8( size_t n) { return n*(1+1); } size_t vsbound16(size_t n) { return n*(2+1); } size_t vsbound32(size_t n) { return n*(4+1); } size_t vsbound64(size_t n) { return n*(8+1); } #ifndef SV_LIM32 #define USE_RLE // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 #define SV_LIM8 unsigned char s_lim8[] = { 0, 28, 28, 28, 28, 36, 36, 36, 36, 0 }; #define SV_ITM8 unsigned s_itm8[] = { 0, 28, 14, 9, 7, 7, 6, 5, 4, -1 } #define SV_LIM16 unsigned char s_lim16[] = { 0, 28, 28, 28, 28, 36, 36, 36, 36, 36, 60, 60, 60, 60, 60, 60, 60, 0 }; #define SV_ITM16 unsigned s_itm16[] = { 0, 28, 14, 9, 7, 7, 6, 5, 4, 4, 6, 5, 5, 4, 4, 4, 3, -1 } #define SV_LIM32 unsigned char s_lim32[] = { 0, 28, 28, 28, 28, 36, 36, 36, 36, 36, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 0 }; #define SV_ITM32 unsigned s_itm32[] = { 0, 28, 14, 9, 7, 7, 6, 5, 4, 4, 6, 5, 5, 4, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, -1 } #define SV_LIM64 unsigned char s_lim64[] = { 0, 28, 28, 28, 28, 36, 36, 36, 36, 36, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,\ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 64, 64, 64, 64, 64, 64, 64, 64, 0 }; #define SV_ITM64 unsigned s_itm64[] = { 0, 28, 14, 9, 7, 7, 6, 5, 4, 4, 6, 5, 5, 4, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1,\ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1 } static SV_LIM8; static SV_ITM8; static SV_LIM16; static SV_ITM16; static SV_LIM32; static SV_ITM32; static SV_ITM64; static SV_LIM64; #define EFE(__x,__i,__start) ((__x[__i] - __start)-(__i)*EF_INC) #endif #define VSENC vsenc #define VSDEC vsdec #define USIZE 8 #include "vsimple.c" #undef USIZE #define USIZE 16 #include "vsimple.c" #undef USIZE #define USIZE 32 #include "vsimple.c" #undef USIZE #define USIZE 64 #include "vsimple.c" #undef USIZE #else #define uint_t T3(uint, USIZE, _t) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunsequenced" unsigned char *T2(VSENC, USIZE)(uint_t *__restrict in, size_t n, unsigned char *__restrict out) { unsigned xm,m,r,x; uint_t *e = in+n,*ip,*sp; unsigned char *op = out,*op_ = out+n*(USIZE/8); for(ip = in; ip < e; ) { PREFETCH(ip+256, 0); sp = ip; #ifdef USE_RLE if(ip+4 < e && *ip == *(ip+1)) { uint_t *q = ip+1; while(q+1 < e && *(q+1) == *ip) q++; r = q - ip; if(r*T2(bsr, USIZE)(*ip) > 16 || (!*ip && r>4)) { m = (*ip)?(USIZE<=32?33:65):0; goto a; } } else #endif r = 0; for(m = x = T2(bsr, USIZE)(*ip);(r+1)*(xm = x > m?x:m) <= T2(s_lim, USIZE)[xm] && ip+r= 0xf) { *op++ = 0xf0; if(n <= 0x100) *op++ = r; else vbxput32(op, r); } else *op++ = r<<4; break; case 1: ctou32(op) = 1 | (unsigned)ip[ 0] << 4 | (unsigned)ip[ 1] << 5 | (unsigned)ip[ 2] << 6 | (unsigned)ip[ 3] << 7 | (unsigned)ip[ 4] << 8 | (unsigned)ip[ 5] << 9 | (unsigned)ip[ 6] << 10 | (unsigned)ip[ 7] << 11 | (unsigned)ip[ 8] << 12 | (unsigned)ip[ 9] << 13 | (unsigned)ip[10] << 14 | (unsigned)ip[11] << 15 | (unsigned)ip[12] << 16 | (unsigned)ip[13] << 17 | (unsigned)ip[14] << 18 | (unsigned)ip[15] << 19 | (unsigned)ip[16] << 20 | (unsigned)ip[17] << 21 | (unsigned)ip[18] << 22 | (unsigned)ip[19] << 23 | (unsigned)ip[20] << 24 | (unsigned)ip[21] << 25 | (unsigned)ip[22] << 26 | (unsigned)ip[23] << 27 | (unsigned)ip[24] << 28 | (unsigned)ip[25] << 29 | (unsigned)ip[26] << 30 | (unsigned)ip[27] << 31; ip += 28; op += 4; break; case 2: ctou32(op) = 2 | (unsigned)ip[ 0] << 4 | (unsigned)ip[ 1] << 6 | (unsigned)ip[ 2] << 8 | (unsigned)ip[ 3] << 10 | (unsigned)ip[ 4] << 12 | (unsigned)ip[ 5] << 14 | (unsigned)ip[ 6] << 16 | (unsigned)ip[ 7] << 18 | (unsigned)ip[ 8] << 20 | (unsigned)ip[ 9] << 22 | (unsigned)ip[10] << 24 | (unsigned)ip[11] << 26 | (unsigned)ip[12] << 28 | (unsigned)ip[13] << 30; ip += 14; op += 4; break; case 3: ctou32(op) = 3 | (unsigned)ip[ 0] << 4 | (unsigned)ip[ 1] << 7 | (unsigned)ip[ 2] << 10 | (unsigned)ip[ 3] << 13 | (unsigned)ip[ 4] << 16 | (unsigned)ip[ 5] << 19 | (unsigned)ip[ 6] << 22 | (unsigned)ip[ 7] << 25 | (unsigned)ip[ 8] << 28; ip += 9; op += 4; break; case 4: ctou64(op) = 4 | (unsigned)ip[ 0] << 4 | (unsigned)ip[ 1] << 8 | (unsigned)ip[ 2] << 12 | (unsigned)ip[ 3] << 16 | (unsigned)ip[ 4] << 20 | (unsigned)ip[ 5] << 24 | (unsigned)ip[ 6] << 28; ip += 7; op += 4; break; case 5: ctou64(op) = 5 | (unsigned)ip[ 0] << 4 | (unsigned)ip[ 1] << 9 | (unsigned)ip[ 2] << 14 | (unsigned)ip[ 3] << 19 | (unsigned)ip[ 4] << 24 | (uint64_t)ip[ 5] << 29 | (uint64_t)ip[ 6] << 34; ip += 7; op += 5; break; case 6: ctou64(op) = 6 | (unsigned)ip[ 0] << 4 | (unsigned)ip[ 1] << 10 | (unsigned)ip[ 2] << 16 | (unsigned)ip[ 3] << 22 | (uint64_t)ip[ 4] << 28 | (uint64_t)ip[ 5] << 34; ip += 6; op += 5; break; case 7: ctou64(op) = 7 | (unsigned)ip[ 0] << 5 | (unsigned)ip[ 1] << 12 | (unsigned)ip[ 2] << 19 | (uint64_t)ip[ 3] << 26 | (uint64_t)ip[ 4] << 33; ip += 5; op += 5; break; case 8: case 9: ctou64(op) = 9 | (unsigned)ip[ 0] << 4 | (unsigned)ip[ 1] << 13 | (unsigned)ip[ 2] << 22 | (uint64_t)ip[ 3] << 31; ip += 4; op += 5; break; case 10: ctou64(op) = 10 | (unsigned)ip[ 0] << 4 | (unsigned)ip[ 1] << 14 | (uint64_t)ip[ 2] << 24 | (uint64_t)ip[ 3] << 34 | (uint64_t)ip[ 4] << 44 | (uint64_t)ip[ 5] << 54; ip += 6; op += 8; break; case 11: case 12: ctou64(op) = 12 | (unsigned)ip[ 0] << 4 | (unsigned)ip[ 1] << 16 | (uint64_t)ip[ 2] << 28 | (uint64_t)ip[ 3] << 40 | (uint64_t)ip[ 4] << 52; ip += 5; op += 8; break; case 13: case 14: case 15: ctou64(op) = 15 | (unsigned)ip[ 0] << 4 | (uint64_t)ip[ 1] << 19 | (uint64_t)ip[ 2] << 34 | (uint64_t)ip[ 3] << 49; ip += 4; op += 8; break; case 16: case 17: case 18: case 19: case 20: ctou64(op) = 11 | (unsigned)ip[ 0] << 4 | (uint64_t)ip[ 1] << 24 | (uint64_t)ip[ 2] << 44; ip += 3; op += 8; break; case 21: case 22: case 23: case 24: case 25: case 26: case 27: case 28: case 29: case 30: ctou64(op) = 13 | (uint64_t)ip[ 0] << 4 | (uint64_t)ip[ 1] << 34; ip += 2; op += 8; break; case 31: case 32: #if USIZE == 64 case 33: case 34: case 35: case 36: #endif ctou64(op) = 14 | (uint64_t)ip[ 0] << 4; ip++; op += 5; break; #if USIZE == 64 default: xm = (m+7)/8; *op++ = 0x17 | (xm-1) << 5; ctou64(op) = (uint64_t)ip[ 0]; ip++; op += xm; break; #endif #ifdef USE_RLE case USIZE<=32?33:65: ip += r; if(--r >= 0xf) { *op++ = 0xf0|8; if(n <= 0x100) *op++ = r; else vbxput32(op, r); } else *op++ = r<<4|8; T2(vbxput, USIZE)(op, ip[0]); break; #endif } if(op > op_) { *out++ = 0; memcpy(out, in, n*(USIZE/8)); return out+n*(USIZE/8); } } return op; } #pragma clang diagnostic pop #define OP(__x) op[__x] // *op++ // #define OPI(__x) op+=__x// // unsigned char *T2(VSDEC, USIZE)(unsigned char *__restrict ip, size_t n, uint_t *__restrict op) { uint_t *op_ = op+n; while(op < op_) { uint64_t w = *(uint64_t *)ip; PREFETCH(ip+256, 0); switch(w & 0xf) { case 0: { uint_t *q = op; unsigned r = (w>>4)&0xf; if(!r) { memcpy(op,ip+1, n*(USIZE/8)); return ip+n*(USIZE/8); } #if defined(__SSE2__) || defined(__ARM_NEON) __m128i zv = _mm_setzero_si128(); #endif ip++; if(unlikely(r == 0xf)) { if(n <= 0x100) r = (w>>8)&0xff, ip++; else { vbxget32(ip, r); } } r -= 1; op += r+1; while(q < op) { #if defined(__SSE2__) || defined(__ARM_NEON) _mm_storeu_si128((__m128i *)q,zv); q = (uint_t *)((unsigned char *)q+16); _mm_storeu_si128((__m128i *)q,zv); q = (uint_t *)((unsigned char *)q+16); #else q[0]=q[1]=q[2]=q[3]=q[4]=q[5]=q[6]=q[7]=0; q+=8; #endif } //while(r-->=0) *op++=0; } break; case 1: OP( 0) = (w >> 4) & 1; OP( 1) = (w >> 5) & 1; OP( 2) = (w >> 6) & 1; OP( 3) = (w >> 7) & 1; OP( 4) = (w >> 8) & 1; OP( 5) = (w >> 9) & 1; OP( 6) = (w >> 10) & 1; OP( 7) = (w >> 11) & 1; OP( 8) = (w >> 12) & 1; OP( 9) = (w >> 13) & 1; OP(10) = (w >> 14) & 1; OP(11) = (w >> 15) & 1; OP(12) = (w >> 16) & 1; OP(13) = (w >> 17) & 1; OP(14) = (w >> 18) & 1; OP(15) = (w >> 19) & 1; OP(16) = (w >> 20) & 1; OP(17) = (w >> 21) & 1; OP(18) = (w >> 22) & 1; OP(19) = (w >> 23) & 1; OP(20) = (w >> 24) & 1; OP(21) = (w >> 25) & 1; OP(22) = (w >> 26) & 1; OP(23) = (w >> 27) & 1; OP(24) = (w >> 28) & 1; OP(25) = (w >> 29) & 1; OP(26) = (w >> 30) & 1; OP(27) = (w >> 31) & 1; OPI( 28); ip+=4; break; case 2: OP( 0) = (w >> 4) & 3; OP( 1) = (w >> 6) & 3; OP( 2) = (w >> 8) & 3; OP( 3) = (w >> 10) & 3; OP( 4) = (w >> 12) & 3; OP( 5) = (w >> 14) & 3; OP( 6) = (w >> 16) & 3; OP( 7) = (w >> 18) & 3; OP( 8) = (w >> 20) & 3; OP( 9) = (w >> 22) & 3; OP(10) = (w >> 24) & 3; OP(11) = (w >> 26) & 3; OP(12) = (w >> 28) & 3; OP(13) = (w >> 30) & 3; OPI( 14); ip+=4; break; case 3: OP( 0) = (w >> 4) & 7; OP( 1) = (w >> 7) & 7; OP( 2) = (w >> 10) & 7; OP( 3) = (w >> 13) & 7; OP( 4) = (w >> 16) & 7; OP( 5) = (w >> 19) & 7; OP( 6) = (w >> 22) & 7; OP( 7) = (w >> 25) & 7; OP( 8) = (w >> 28) & 7; OPI( 9); ip+=4; break; case 4: OP( 0) = (w >> 4) & 0xf; OP( 1) = (w >> 8) & 0xf; OP( 2) = (w >> 12) & 0xf; OP( 3) = (w >> 16) & 0xf; OP( 4) = (w >> 20) & 0xf; OP( 5) = (w >> 24) & 0xf; OP( 6) = (w >> 28) & 0xf; OPI( 7); ip+=4; break; case 5: OP( 0) = (w >> 4) & 0x1f; OP( 1) = (w >> 9) & 0x1f; OP( 2) = (w >> 14) & 0x1f; OP( 3) = (w >> 19) & 0x1f; OP( 4) = (w >> 24) & 0x1f; OP( 5) = (w >> 29) & 0x1f; OP( 6) = (w >> 34) & 0x1f; OPI( 7); ip+=5; break; case 6: OP(0) = (w >> 4) & 0x3f; OP(1) = (w >> 10) & 0x3f; OP(2) = (w >> 16) & 0x3f; OP(3) = (w >> 22) & 0x3f; OP(4) = (w >> 28) & 0x3f; OP(5) = (w >> 34) & 0x3f; OPI( 6); ip+=5; break; case 7: #if USIZE == 64 if(unlikely((w>>4) & 1)) { unsigned b = ((*ip++) >> 5)+1; *op = *(unsigned long long *)ip; if(unlikely(b!=8)) *op &= (1ull<<(b*8))-1; op++; ip += b; break; } #endif OP(0) = (w >> 5) & 0x7f; OP(1) = (w >> 12) & 0x7f; OP(2) = (w >> 19) & 0x7f; OP(3) = (w >> 26) & 0x7f; OP(4) = (w >> 33) & 0x7f; OPI( 5); ip+=5; break; #ifdef USE_RLE case 8: { uint_t *q=op,u; int r = (w>>4)&0xf; ip++; if(unlikely(r == 0xf)) { if(n <= 0x100) r = (w>>8)&0xff, ip++; else { vbxget32(ip, r); } } op += r+1; T2(vbxget, USIZE)(ip,u); #if (defined(__SSE2__) || defined(__ARM_NEON)) && USIZE == 32 { __m128i v = _mm_set1_epi32(u); while(q < op) { _mm_storeu_si128((__m128i *)q,v); q += 4; _mm_storeu_si128((__m128i *)q,v); q += 4; } } #else while(q < op) { q[0]=q[1]=q[2]=q[3]=q[4]=q[5]=q[6]=q[7]=u; q+=8; } //while(r-->=0) *op++=u; #endif } break; #endif case 9: OP(0) = (w >> 4) & 0x1ff; OP(1) = (w >> 13) & 0x1ff; OP(2) = (w >> 22) & 0x1ff; OP(3) = (w >> 31) & 0x1ff; OPI( 4); ip+=5; break; case 10: OP(0) = (w >> 4) & 0x3ff; OP(1) = (w >> 14) & 0x3ff; OP(2) = (w >> 24) & 0x3ff; OP(3) = (w >> 34) & 0x3ff; OP(4) = (w >> 44) & 0x3ff; OP(5) = (w >> 54) & 0x3ff; OPI( 6); ip+=8; break; case 12: OP(0) = (w >> 4) & 0xfffu; OP(1) = (w >> 16) & 0xfffu; OP(2) = (w >> 28) & 0xfffu; OP(3) = (w >> 40) & 0xfffu; OP(4) = (w >> 52) & 0xfffu; OPI( 5); ip+=8; break; case 15: OP(0) = (w >> 4) & 0x7fffu; OP(1) = (w >> 19) & 0x7fffu; OP(2) = (w >> 34) & 0x7fffu; OP(3) = (w >> 49) & 0x7fffu; OPI( 4); ip+=8; break; case 11: OP(0) = (w >> 4) & 0xfffffu; // 20 OP(1) = (w >> 24) & 0xfffffu; OP(2) = (w >> 44) & 0xfffffu; OPI( 3); ip+=8; break; case 13: OP(0) = (w >> 4) & 0x3fffffffu; OP(1) = (w >> 34) & 0x3fffffffu; OPI( 2); ip+=8; break; case 14: #if USIZE <= 32 OP(0) = (w >> 4) & 0xfffffffffull; OPI( 1); ip+=5; #else OP(0) = (w >> 4) & 0xfffffffffull; OPI( 1); ip+=5; #endif break; } } return ip; } #endif