// This file is not GPL. It may be used for educational purposes only. #include "stdafx.h" #include "compr_kraken.h" #include "compress.h" #include "compr_util.h" #include "compr_entropy.h" #include #include #include "match_hasher.h" #include "compr_match_finder.h" struct KrakenRecentOffs { KrakenRecentOffs() { offs[4] = offs[5] = offs[6] = 8; } int offs[8]; }; struct KrakEncLz { uint8 *lits_start; uint8 *lits; uint8 *sub_lits_start; uint8 *sub_lits; uint8 *tokens_start; uint8 *tokens; uint8 *u8_offs_start; uint8 *u8_offs; uint32 *u32_offs_start; uint32 *u32_offs; uint8 *lrl8_start; uint8 *lrl8; uint32 *len32_start; uint32 *len32; int src_len; const uint8 *src_ptr; int recent0; int encode_flags; }; static bool KrakIsMatchLongEnough(uint ml, uint offs) { switch (ml) { case 0: case 1: case 2: return false; case 3: return offs < 0x4000; case 4: return offs < 0x20000; case 5: return offs < 0x100000; case 6: case 7: return offs < 0x400000; default: return true; } } enum IsRecent { kOFFSET = 0, kRECENT = 1, }; struct Krak { enum { kMinBytesPerRound = 256, kMaxBytesPerRound = 4096, kRecentOffsetCount = 3, }; struct Token { int recent_offset0; int litlen; int matchlen; int offset; }; struct TokenArray { Token *data; int size, capacity; }; struct State { int best_bit_count; int recent_offs[3]; int matchlen; int litlen; int quick_recent_matchlen_litlen; int prev_state; void Initialize() { best_bit_count = 0; recent_offs[0] = 8; recent_offs[1] = 8; recent_offs[2] = 8; matchlen = 0; litlen = 0; prev_state = 0; quick_recent_matchlen_litlen = 0; } }; struct CostModel { int chunk_type; int sub_or_copy_mask; uint lit_cost[256]; uint token_cost[256]; int offs_encode_type; uint offs_cost[256]; uint offs_lo_cost[256]; uint matchlen_cost[256]; }; struct Stats { HistoU8 lit_raw; HistoU8 lit_sub; HistoU8 token; HistoU8 matchlen; int offs_encode_type; HistoU8 offs; HistoU8 offs_lo; static void RescaleOne(HistoU8 *h) { for (size_t i = 0; i != 256; i++) h->count[i] = (h->count[i] >> 4) + 1; } static void Rescale(Stats *s) { RescaleOne(&s->lit_raw); RescaleOne(&s->lit_sub); RescaleOne(&s->offs); if (s->offs_encode_type > 1) RescaleOne(&s->offs_lo); RescaleOne(&s->token); RescaleOne(&s->matchlen); } static void RescaleAddOne(HistoU8 *h, const HistoU8 *t) { for (size_t i = 0; i != 256; i++) h->count[i] = ((h->count[i] + t->count[i]) >> 5) + 1; } static void RescaleAdd(Stats *s, const Stats *t, bool chunk_type_same) { if (chunk_type_same) { RescaleAddOne(&s->lit_raw, &t->lit_raw); RescaleAddOne(&s->lit_sub, &t->lit_sub); } else { RescaleOne(&s->lit_raw); RescaleOne(&s->lit_sub); } RescaleAddOne(&s->token, &t->token); RescaleAddOne(&s->matchlen, &t->matchlen); if (s->offs_encode_type == t->offs_encode_type) { RescaleAddOne(&s->offs, &t->offs); if (s->offs_encode_type > 1) RescaleAddOne(&s->offs_lo, &t->offs_lo); } else { s->offs = t->offs; s->offs_lo = t->offs_lo; s->offs_encode_type = t->offs_encode_type; RescaleOne(&s->offs); if (s->offs_encode_type > 1) RescaleOne(&s->offs_lo); } } static void ReduceIfHigh(Stats *s) { // empty } static void Update(Stats *h, const uint8 *src, int pos, const Token *tokens, int num_token) { enum { kIncrement = 2 }; for (int i = 0; i < num_token; i++) { const Token *t = &tokens[i]; int litlen = t->litlen; int recent = t->recent_offset0; for (int j = 0; j < litlen; j++) { uint b = src[pos + j]; h->lit_raw.count[b] += kIncrement; h->lit_sub.count[(uint8)(b - src[pos + j - recent])] += kIncrement; } pos += litlen + t->matchlen; int length_field = litlen; if (litlen >= 3) { h->matchlen.count[std::min(litlen - 3, 255)] += kIncrement; length_field = 3; } if (t->matchlen < 2) { assert(t->matchlen >= 2); continue; } uint offset = t->offset; int recent_field; if (t->offset <= 0) { recent_field = -(int)offset; } else { recent_field = 3; if (h->offs_encode_type == 0) { if (offset >= 8388360) { uint t = BSR(offset - 8322816) | 0xF0; h->offs.count[t] += kIncrement; } else { uint t = ((offset - 8) & 0xF) + 16 * (BSR(offset - 8 + 256) - 8); h->offs.count[t] += kIncrement; } } else if (h->offs_encode_type == 1) { uint t = BSR(offset + 8) - 3; uint u = 8 * t | (((offset + 8) >> t) ^ 8); h->offs.count[u] += kIncrement; } else { uint ohi = offset / h->offs_encode_type, olo = offset % h->offs_encode_type; uint t = BSR(ohi + 8) - 3; uint u = 8 * t | (((ohi + 8) >> t) ^ 8); h->offs.count[u] += kIncrement; h->offs_lo.count[olo] += kIncrement; } } int matchlen_field = t->matchlen - 2; if (t->matchlen - 17 >= 0) { h->matchlen.count[std::min(t->matchlen - 17, 255)] += kIncrement; matchlen_field = 15; } int token_value = (matchlen_field << 2) + (recent_field << 6) + (length_field); h->token.count[token_value] += kIncrement; } } }; static void MakeCostModel(const Stats *h, CostModel *cm) { ConvertHistoToCost(h->offs, cm->offs_cost, 36); if (h->offs_encode_type > 1) ConvertHistoToCost(h->offs_lo, cm->offs_lo_cost, 0); ConvertHistoToCost(h->token, cm->token_cost, 18); ConvertHistoToCost(h->matchlen, cm->matchlen_cost, 12); if (cm->chunk_type == 1) ConvertHistoToCost(h->lit_raw, cm->lit_cost, 0); else ConvertHistoToCost(h->lit_sub, cm->lit_cost, 0); } static __forceinline uint BitsForLitlen(CostModel &cost_model, int cur_litlen) { if (cur_litlen < 3) return 0; if (cur_litlen - 3 >= 255) { int v = BSR(((unsigned int)(cur_litlen - 3 - 255) >> 6) + 1); return cost_model.matchlen_cost[255] + 32 * (2 * v + 7); } else { return cost_model.matchlen_cost[cur_litlen - 3]; } } static __forceinline uint BitsForLit(const uint8 *src, int pos, int recent, const CostModel &cm, int litidx) { const uint8 *p = src + pos; return cm.lit_cost[(uint8)(p[0] - (p[-recent] & cm.sub_or_copy_mask))]; } static __forceinline uint BitsForLits(const uint8 *src, int pos, int num, int recent, const CostModel &cm, int litidx) { const uint8 *p = src + pos; uint sum = 0; for (int i = 0; i < num; i++, p++) sum += cm.lit_cost[(uint8)(p[0] - (p[-recent] & cm.sub_or_copy_mask))]; return sum; } static __forceinline bool CheckMatchValidLength(uint ml, uint offs) { return (offs < 0x100000) || ((offs < 0x200000) ? (ml >= 5) : (offs < 0x400000) ? (ml >= 6) : (ml >= 8)); } static __forceinline int GetMatchlengthQ(const uint8 *src, int offset, const uint8 *src_end, uint32 u32_at_cur) { return ::GetMatchlengthQ(src, offset, src_end, u32_at_cur); } static __forceinline int GetMatchlengthMin2(const uint8 *src_ptr_cur, int offset, const uint8 *src_ptr_safe_end) { return ::GetMatchlengthMin2(src_ptr_cur, offset, src_ptr_safe_end); } static __forceinline int BitsForToken(const CostModel &cost_model, int cur_matchlen, int cmd_offset, int recent_field, int length_field) { if (cur_matchlen - 17 >= 0) { int bits_for_matchlen; if (cur_matchlen - 17 >= 255) { int bsr = BSR(((unsigned int)(cur_matchlen - 17 - 255) >> 6) + 1); bits_for_matchlen = cost_model.matchlen_cost[255] + 32 * (2 * bsr + 7); } else { bits_for_matchlen = cost_model.matchlen_cost[cur_matchlen - 17]; } return cost_model.token_cost[(15 << 2) + (recent_field << 6) + length_field] + bits_for_matchlen; } else { return cost_model.token_cost[((cur_matchlen - 2) << 2) + (recent_field << 6) + length_field]; } } static uint BitsForOffset(const CostModel &cost_model, uint offs) { if (cost_model.offs_encode_type == 0) { if (offs >= 8388360) { unsigned t = BSR(offs - 8322816) | 0xF0; unsigned u = t - 224; return cost_model.offs_cost[t] + 32 * u + 12; } else { unsigned t = ((offs - 8) & 0xF) + 16 * (BSR(offs - 8 + 256) - 8); unsigned u = (t >> 4) + 4; return cost_model.offs_cost[t] + 32 * u; } } else if (cost_model.offs_encode_type == 1) { unsigned t = BSR(offs + 8) - 3; unsigned u = 8 * t | (((offs + 8) >> t) ^ 8); return cost_model.offs_cost[u] + 32 * (u >> 3); } else { unsigned ohi = offs / cost_model.offs_encode_type, olo = offs % cost_model.offs_encode_type; unsigned t = BSR(ohi + 8) - 3; unsigned u = 8 * t | (((ohi + 8) >> t) ^ 8); return cost_model.offs_cost[u] + 32 * (u >> 3) + cost_model.offs_lo_cost[olo]; } } static __forceinline int GetRecentOffsetIndex(const int *arr, int offset) { return (offset == arr[0]) ? 0 : (offset == arr[1]) ? 1 : (offset == arr[2]) ? 2 : -1; } }; static float GetTime_KrakenWriteBits(int platforms, int src_len, int tokens, int len8) { return CombineCostComponents(platforms, 200.0f + src_len * 0.405f + tokens * 15.213f + len8 * 4.017f, 200.0f + src_len * 0.419f + tokens * 19.861f + len8 * 10.898f, 200.0f + src_len * 0.647f + tokens * 24.886f + len8 * 9.685f, 200.0f + src_len * 0.305f + tokens * 13.591f + len8 * 4.394f); } static double GetTime_KrakenLengths(int platforms, int len8, int len32) { return CombineCostComponents(platforms, 42.933f + len8 * 0.478f + len32 * 21.527f, 36.646f + len8 * 0.746f + len32 * 32.345f, 115.731f + len8 * 0.815f + len32 * 36.682f, 48.796f + len8 * 0.453f + len32 * 20.770f); } static int Kraken_EncodeLzArrays(float *cost_ptr, int *chunk_type_ptr, Krak::Stats *stats, uint8 *dst, uint8 *dst_end, LzCoder *lzcoder, LzTemp *lztemp, KrakEncLz *kl, int start_pos) { uint8 *dst_org = dst; if (stats) memset(stats, 0, sizeof(*stats)); const uint8 *src_ptr = kl->src_ptr; int src_len = kl->src_len; int initial_bytes = start_pos == 0 ? 8 : 0; for (int i = 0; i < initial_bytes; i++) *dst++ = src_ptr[i]; int level = lzcoder->compression_level; int flag_ignore_u32_length = 0; int extra_size = 0; assert((lzcoder->encode_flags & 1) == 0); int num_lits = kl->lits - kl->lits_start; float lit_cost = kInvalidCost; float memcpy_cost = num_lits + 3; if (num_lits < 32 || level <= -4) { *chunk_type_ptr = 1; int n = EncodeArrayU8_Memcpy(dst, dst_end, kl->lits_start, num_lits); if (n < 0) return src_len; dst += n; lit_cost = memcpy_cost; } else { HistoU8 lits_histo, litsub_histo; bool has_litsub = (kl->sub_lits != kl->sub_lits_start); CountBytesHistoU8(kl->lits_start, num_lits, &lits_histo); if (has_litsub) CountBytesHistoU8(kl->sub_lits_start, num_lits, &litsub_histo); if (stats) { stats->lit_raw = lits_histo; if (has_litsub) stats->lit_sub = litsub_histo; } int lit_n = -1; bool skip_normal_lit = false; if (has_litsub) { float litsub_extra_cost = CombineCostComponents1(lzcoder->platforms, num_lits, 0.14399999f, 0.292f, 0.322f, 0.129f) * lzcoder->speed_tradeoff; bool skip_litsub = (level < 6) && (GetHistoCostApprox(lits_histo, num_lits) * 0.125f <= GetHistoCostApprox(litsub_histo, num_lits) * 0.125f + litsub_extra_cost); if (!skip_litsub) { *chunk_type_ptr = 0; float litsub_cost = kInvalidCost; lit_n = EncodeArrayU8WithHisto(dst, dst_end, kl->sub_lits_start, num_lits, litsub_histo, lzcoder->entropy_opts, lzcoder->speed_tradeoff, lzcoder->platforms, &litsub_cost, level); litsub_cost += litsub_extra_cost; if (lit_n > 0 && lit_n < num_lits && litsub_cost <= memcpy_cost) { lit_cost = litsub_cost; if (level < 6) skip_normal_lit = true; } } } if (!skip_normal_lit) { int n = EncodeArrayU8WithHisto(dst, dst_end, kl->lits_start, num_lits, lits_histo, lzcoder->entropy_opts, lzcoder->speed_tradeoff, lzcoder->platforms, &lit_cost, level); if (n > 0) { lit_n = n; *chunk_type_ptr = 1; } } if (lit_n < 0) return src_len; dst += lit_n; } float token_cost = kInvalidCost; int n = EncodeArrayU8(dst, dst_end, kl->tokens_start, kl->tokens - kl->tokens_start, lzcoder->entropy_opts, lzcoder->speed_tradeoff, lzcoder->platforms, &token_cost, level, stats ? &stats->token : NULL); if (n < 0) return src_len; dst += n; int offs_encode_type = 0; float offs_cost = kInvalidCost; n = EncodeLzOffsets(dst, dst_end, kl->u8_offs_start, kl->u32_offs_start, kl->u8_offs - kl->u8_offs_start, lzcoder->entropy_opts, lzcoder->speed_tradeoff, lzcoder->platforms, &offs_cost, 8, (lzcoder->encode_flags & 4), &offs_encode_type, level, stats ? &stats->offs : NULL, stats ? &stats->offs_lo : NULL); if (n < 0) return src_len; dst += n; if (stats) stats->offs_encode_type = offs_encode_type; float lrl8_cost = kInvalidCost; n = EncodeArrayU8(dst, dst_end, kl->lrl8_start, kl->lrl8 - kl->lrl8_start, lzcoder->entropy_opts, lzcoder->speed_tradeoff, lzcoder->platforms, &lrl8_cost, level, stats ? &stats->matchlen : NULL); if (n < 0) return src_len; dst += n; int lrl_num = kl->lrl8 - kl->lrl8_start; int offs_num = kl->u8_offs - kl->u8_offs_start; int tok_num = kl->tokens - kl->tokens_start; int required = std::max(lrl_num, offs_num) + lrl_num + num_lits + 4 * lrl_num + 6 * offs_num + tok_num + 16; required = std::max(std::max(required, 2 * num_lits), num_lits + 2 * tok_num) + 0xd000; int scratch_available = std::min(3 * src_len + 32 + 0xd000, 0x6C000); if (required > scratch_available) { assert(required <= scratch_available); return src_len; } n = WriteLzOffsetBits(dst, dst_end, kl->u8_offs_start, kl->u32_offs_start, offs_num, offs_encode_type, kl->len32_start, kl->len32 - kl->len32_start, flag_ignore_u32_length, extra_size); if (n < 0) return src_len; dst += n; if (dst - dst_org >= src_len) return src_len; float bits_cost = GetTime_KrakenWriteBits(lzcoder->platforms, src_len, tok_num, lrl_num) * lzcoder->speed_tradeoff + (initial_bytes + flag_ignore_u32_length + n); float cost = token_cost + lit_cost + offs_cost + lrl8_cost + bits_cost; *cost_ptr = GetTime_KrakenLengths(lzcoder->platforms, lrl_num, kl->len32 - kl->len32_start) * lzcoder->speed_tradeoff + cost; return dst - dst_org; } static void KrakEncLz_Init(KrakEncLz *kl, LzTemp *lztemp, int src_len, const uint8 *src_base, int encode_flags) { kl->src_ptr = src_base; kl->src_len = src_len; size_t nlits = src_len + 8; size_t ntokens = src_len / 2 + 8; size_t nu8offs = src_len / 3; size_t nu32offs = src_len / 3; size_t nlen8 = src_len / 5; size_t nlen32 = src_len / 256; uint8 *p = (uint8*)lztemp->scratch0.Allocate(nlits * 2 + ntokens + nu8offs + nu32offs * 4 + nlen8 + nlen32 * 4 + 256); kl->lits_start = kl->lits = p, p += nlits; kl->sub_lits_start = kl->sub_lits = p, p += nlits; kl->tokens_start = kl->tokens = p, p += ntokens; kl->u8_offs = kl->u8_offs_start = p, p += nu8offs; p = AlignPointer(p, 4); kl->u32_offs = kl->u32_offs_start = (uint32*)p, p += nu32offs * 4; kl->lrl8 = kl->lrl8_start = p, p += nlen8; p = AlignPointer(p, 4); kl->len32 = kl->len32_start = (uint32*)p; kl->recent0 = 8; kl->encode_flags = encode_flags; } static inline int KrakEnk_WriteMatchLen(KrakEncLz &kl, int ml) { int ml_token = ml - 2; if (ml_token >= 15) { ml_token = 15; if (ml >= 255 + 17) { *kl.lrl8++ = 255; *kl.len32++ = ml - 255 - 17; } else { *kl.lrl8++ = ml - 17; } } return ml_token << 2; } template static void KrakEnc_WriteLitsLong(KrakEncLz &kl, const uint8 *p, size_t len) { if (DoSubtract) { SubtractBytesUnsafe(kl.sub_lits, p, len, -kl.recent0); kl.sub_lits += len; } uint8 *d = kl.lits, *d_end = d + len; do { *(uint32*)d = *(uint32*)p; d += 4, p += 4; } while (d < d_end); kl.lits = d_end; if (len >= 258) { *kl.lrl8++ = 255; *kl.len32++ = len - 258; } else { *kl.lrl8++ = (uint8)(len - 3); } } template static inline int KrakEnc_WriteLits(KrakEncLz &kl, const uint8 *p, size_t len) { if (len == 0) return 0; if (len > 8) { KrakEnc_WriteLitsLong(kl, p, len); return 3; } uint8 *lrl8 = kl.lrl8; *lrl8 = (uint8)(len - 3); kl.lrl8 = (len >= 3) ? lrl8 + 1 : lrl8; uint8 *ll = kl.lits; *(uint64*)ll = *(uint64*)p; kl.lits = ll + len; if (DoSubtract) { uint8 *sl = kl.sub_lits; _mm_storel_epi64((__m128i *)sl, _mm_sub_epi8(_mm_loadl_epi64((const __m128i *)p), _mm_loadl_epi64((const __m128i *)&p[-kl.recent0]))); kl.sub_lits = sl + len; } return std::min(len, 3); } static inline void KrakEnc_WriteFarOffs(KrakEncLz &kl, uint32 offs) { int bsr = BSR(offs - 8322816); *kl.u8_offs++ = bsr | 0xf0; *kl.u32_offs++ = offs; } static inline void KrakEnc_WriteNearOffs(KrakEncLz &kl, uint32 offs) { int bsr = BSR(offs + 248); *kl.u8_offs++ = ((offs - 8) & 0xF) | 16 * (bsr - 8); *kl.u32_offs++ = offs; } static inline void KrakEnc_WriteOffs(KrakEncLz &kl, uint32 offs) { if (offs >= 8388360) KrakEnc_WriteFarOffs(kl, offs); else KrakEnc_WriteNearOffs(kl, offs); } template static inline void KrakEnc_AddToken(KrakEncLz &kl, KrakenRecentOffs &recent, const uint8 *litstart, size_t litlen, int matchlen, int offs_or_recent) { int token = KrakEnc_WriteLits(kl, litstart, litlen); token += KrakEnk_WriteMatchLen(kl, matchlen); if (offs_or_recent > 0) { token += 3 << 6; if (DoRecent) { recent.offs[6] = recent.offs[5]; recent.offs[5] = recent.offs[4]; recent.offs[4] = offs_or_recent; } kl.recent0 = offs_or_recent; KrakEnc_WriteOffs(kl, offs_or_recent); } else { if (DoRecent) { size_t idx = -offs_or_recent; assert(idx >= 0 && idx <= 2); token += idx << 6; int v = recent.offs[idx + 4]; recent.offs[idx + 4] = recent.offs[idx + 3]; recent.offs[idx + 3] = recent.offs[idx + 2]; recent.offs[4] = v; kl.recent0 = v; } } *kl.tokens++ = token; } template static inline void KrakEnc_AddFinal(KrakEncLz &kl, const uint8 *p, const uint8 *pend) { size_t len = pend - p; if (len) { memcpy(kl.lits, p, len); kl.lits += len; if (DoSubtract) { SubtractBytes(kl.sub_lits, p, len, -kl.recent0); kl.sub_lits += len; } } } template int KrakenCompressVeryfast(LzCoder *coder, LzTemp *lztemp, MatchLenStorage *mls_unused, const uint8 *src, int src_size, uint8 *dst, uint8 *dst_end, int start_pos, int *chunk_type_ptr, float *cost_ptr) { enum { SkipFactor = (Level >= 1) ? 5 : (Level >= -2) ? 4 : 3, DoRecent = Level >= 1, DoSubtract = Level >= -1, }; *chunk_type_ptr = -1; if (src_size <= 128) return -1; uint dict_size = coder->opts->dictionarySize > 0 && coder->opts->dictionarySize <= 0x40000000 ? coder->opts->dictionarySize : 0x40000000; KrakEncLz kl; KrakEncLz_Init(&kl, lztemp, src_size, src, coder->encode_flags); int initial_copy_bytes = (start_pos == 0) ? 8 : 0; const uint8 *src_end_minus8 = src + src_size - 8; const uint8 *src_end_minus16 = src + src_size - 16; int skip = 1 << SkipFactor; const uint8 *src_cur = src + initial_copy_bytes; const uint8 *src_end = src + src_size; const uint8 *litstart = src_cur; Hasher *hasher = (Hasher*)coder->hasher; KrakenRecentOffs recent; int last_offs = -8; const uint8 *src_base = hasher->src_base_; uint64 hashmult = hasher->hashmult_; typename Hasher::ElemType *hash_ptr = hasher->hash_ptr_, *hash; int hashshift = 64 - hasher->hash_bits_; const uint8 *src_cur_next; while ((skip >> SkipFactor) < src_end_minus16 - src_cur) { src_cur_next = src_cur + (skip >> SkipFactor); if (Level >= 0) { hash = &hash_ptr[(size_t)(*(uint64*)src_cur_next * hashmult >> hashshift)]; _mm_prefetch((char*)hash, _MM_HINT_T0); } hash = &hash_ptr[(size_t)(*(uint64*)src_cur * hashmult >> hashshift)]; uint32 u32_at_cur = *(uint32*)src_cur; uint32 hashval = *hash; *hash = src_cur - src_base; int offs, offs_or_recent; const uint8 *matchstart; if (Level >= 1 && (uint16)u32_at_cur == *(uint16*)&src_cur[-recent.offs[5]]) { offs = -recent.offs[5]; offs_or_recent = -1; matchstart = src_cur; src_cur += 2; } else if (Level >= 1 && (uint16)u32_at_cur == *(uint16*)&src_cur[-recent.offs[6]]) { offs = -recent.offs[6]; offs_or_recent = -2; matchstart = src_cur; src_cur += 2; } else if (!((u32_at_cur ^ *(uint32*)&src_cur[last_offs]) & 0xffffff00)) { src_cur += 1; offs = last_offs; offs_or_recent = 0; matchstart = src_cur; hash_ptr[(size_t)(*(uint64*)src_cur * hashmult >> hashshift)] = src_cur - src_base; src_cur += 3; } else { offs_or_recent = (typename Hasher::ElemType)(src_cur - src_base - hashval); if (u32_at_cur != *(uint32*)&src_cur[-offs_or_recent]) goto no_match; if ((uint)(offs_or_recent - 8) < (uint)(dict_size - 8)) { offs = -offs_or_recent; } else if (u32_at_cur == *(uint32*)(src_cur - 8)) { offs = -8; offs_or_recent = 8; } else { no_match: if (Level >= -2) skip++; else skip = std::min(skip + ((src_cur - litstart) >> 1), 296); src_cur = src_cur_next; continue; } // Reduce literal length and increase match length for bytes preceding the match. if (Level >= -2) { while (src_cur > litstart && (src_base - src_cur) < offs && src_cur[-1] == src_cur[offs - 1]) src_cur--; } matchstart = src_cur; src_cur += 4; } // compute length of it for (; src_cur < src_end - 8; src_cur += 4) { uint32 v = *(uint32*)src_cur ^ *(uint32*)(src_cur + offs); if (v != 0) { src_cur += (BSF(v) >> 3); break; } } src_cur = std::min(src_cur, src_end - 8); assert(matchstart >= litstart); int ml = src_cur - matchstart; KrakEnc_AddToken(kl, recent, litstart, matchstart - litstart, ml, offs_or_recent); litstart = src_cur; last_offs = offs; if (src_cur >= src_end - 16) break; if (Level >= 0) { for (int i = 1; i < ml; i *= 2) hash_ptr[(size_t)(*(uint64*)(i + matchstart) * hashmult >> hashshift)] = i + matchstart - src_base; } skip = 1 << SkipFactor; } KrakEnc_AddFinal(kl, litstart, src_end); return Kraken_EncodeLzArrays(cost_ptr, chunk_type_ptr, NULL, dst, dst_end, coder, lztemp, &kl, start_pos); } static __forceinline void KrakCheckRecentMatch(const uint8 *src, const uint8 *src_end, uint32 u32, KrakenRecentOffs &recent, int idx, int &best_ml, int &best_off) { int ml = GetMatchlengthQ(src, recent.offs[4 + idx], src_end, u32); if (ml > best_ml) best_ml = ml, best_off = idx; } template static __forceinline LengthAndOffset KrakenFast_GetMatch(const uint8 *cur_ptr, const uint8 *src_end_safe, const uint8 *lit_start, KrakenRecentOffs &recent, Hasher *hasher, const uint8 *next_cur_ptr, int dict_size, int min_match_length) { typename Hasher::HashPos hp = hasher->GetHashPos(cur_ptr); uint32 u32_at_src = *(uint32*)cur_ptr; hasher->SetHashPosPrefetch(next_cur_ptr); int recent_ml = 0, recent_off = -1; KrakCheckRecentMatch(cur_ptr, src_end_safe, u32_at_src, recent, 0, recent_ml, recent_off); KrakCheckRecentMatch(cur_ptr, src_end_safe, u32_at_src, recent, 1, recent_ml, recent_off); KrakCheckRecentMatch(cur_ptr, src_end_safe, u32_at_src, recent, 2, recent_ml, recent_off); int best_offs = 0, best_ml = 0; // If we found a recent offset at least 4 bytes long then use it. if (recent_ml >= 4) { hasher->Insert(hp); best_offs = -recent_off, best_ml = recent_ml; } else { // Make decompression faster by skipping short recent matches if (cur_ptr - lit_start >= 56) { if (recent_ml <= 2) recent_ml = 0; min_match_length = std::max(min_match_length, 5); } int max_ml = 0; uint32 *cur_hash_ptr = hp.ptr1; for (;;) { for (size_t hashidx = 0; hashidx != Hasher::NumHash; hashidx++) { if ((cur_hash_ptr[hashidx] & 0xfc000000) == (hp.hi & 0xfc000000)) { int cur_offs = (hp.pos - cur_hash_ptr[hashidx]) & 0x3ffffff; if (cur_offs < dict_size) { cur_offs = std::max(cur_offs, 8); if (*(uint32*)(cur_ptr - cur_offs) == u32_at_src && (max_ml < 4 || (cur_ptr + max_ml < src_end_safe && *(cur_ptr + max_ml) == *(cur_ptr + max_ml - cur_offs)))) { int cur_ml = 4 + CountMatchingBytes(cur_ptr + 4, src_end_safe, cur_offs); if (cur_ml > max_ml && cur_ml >= min_match_length) { max_ml = cur_ml; if (KrakIsMatchLongEnough(cur_ml, cur_offs) && IsMatchBetter(cur_ml, cur_offs, best_ml, best_offs)) best_offs = cur_offs, best_ml = cur_ml; } } } } } if (!Hasher::DualHash || cur_hash_ptr == hp.ptr2) break; cur_hash_ptr = hp.ptr2; } hasher->Insert(hp); if (!IsBetterThanRecent(recent_ml, best_ml, best_offs)) best_offs = -recent_off, best_ml = recent_ml; } LengthAndOffset match = { best_ml, best_offs }; return match; } template int KrakenCompressFast(LzCoder *lzcoder, LzTemp *lztemp, const uint8 *src, int src_len, uint8 *dst, uint8 *dst_end, int start_pos, int *chunk_type_ptr, float *cost_ptr) { const uint8 *src_end = src + src_len; KrakEncLz kl; KrakenRecentOffs recent; *chunk_type_ptr = -1; if (src_len <= 128) return src_len; int dict_size = lzcoder->opts->dictionarySize; dict_size = (dict_size <= 0) ? 0x40000000 : std::min(dict_size, 0x40000000); int min_match_length = std::max(lzcoder->opts->min_match_length, 4); int initial_copy_bytes = (start_pos == 0) ? 8 : 0; KrakEncLz_Init(&kl, lztemp, src_len, src, lzcoder->encode_flags); int recent0 = 8; const uint8 *cur_ptr = src + initial_copy_bytes, *litstart = cur_ptr; typedef MatchHasher Hasher; Hasher *hasher = (Hasher *)lzcoder->hasher; hasher->SetHashPos(cur_ptr); uint skip = 256; LengthAndOffset m; for(;;) { if ((NumHash == 1) && *(uint32*)(cur_ptr + 1) == *(uint32*)(cur_ptr + 1 - recent0)) { int len = CountMatchingBytes(cur_ptr + 5, src_end - 8, recent0) + 4; typename Hasher::HashPos hp = hasher->GetHashPos(cur_ptr); hasher->SetHashPosPrefetch(cur_ptr + 2); hasher->Insert(hp); cur_ptr += 1; m.offset = 0; m.length = len; } else { uint increment = (NumHash == 1) ? skip >> 8 : 1; const uint8 *next_ptr = cur_ptr + increment; if (src_end - 16 - cur_ptr <= increment) break; m = KrakenFast_GetMatch(cur_ptr, src_end - 8, litstart, recent, hasher, next_ptr, dict_size, min_match_length); if (m.length < 2) { cur_ptr = next_ptr; skip++; continue; } if (NumLazy >= 1) { while (cur_ptr + 1 < src_end - 16) { LengthAndOffset m1 = KrakenFast_GetMatch(cur_ptr + 1, src_end - 8, litstart, recent, hasher, cur_ptr + 2, dict_size, min_match_length); if (m1.length >= 2 && GetLazyScore(m1, m) > 0) { cur_ptr += 1; // lazy1 is better m = m1; } else { if (NumLazy < 2 || cur_ptr + 2 >= src_end - 16 || m.length == 2) break; LengthAndOffset m2 = KrakenFast_GetMatch(cur_ptr + 2, src_end - 8, litstart, recent, hasher, cur_ptr + 3, dict_size, min_match_length); if (m2.length >= 2 && GetLazyScore(m2, m) > 3) { cur_ptr += 2; // lazy2 is better m = m2; } else { break; } } } } } assert(m.offset >= -2); ptrdiff_t actual_offs = (m.offset <= 0) ? recent.offs[-m.offset + 4] : m.offset; // Reduce literal length and increase match length for bytes preceding the match. while (cur_ptr > litstart && cur_ptr - hasher->src_base_ > actual_offs && cur_ptr[-1] == cur_ptr[-actual_offs - 1]) cur_ptr--, m.length++; if (m.offset == 0 && cur_ptr == litstart) m.offset = -1; KrakEnc_AddToken(kl, recent, litstart, cur_ptr - litstart, m.length, m.offset); cur_ptr += m.length; litstart = cur_ptr; if (cur_ptr >= src_end - 16) break; recent0 = kl.recent0; hasher->InsertRange(cur_ptr - m.length, m.length); skip = 256; } KrakEnc_AddFinal(kl, litstart, src_end); return Kraken_EncodeLzArrays(cost_ptr, chunk_type_ptr, NULL, dst, dst_end, lzcoder, lztemp, &kl, start_pos); } static LengthAndOffset KrakenGetLzMatch(const LengthAndOffset *matches, KrakenRecentOffs *recents, const uint8 *src, const uint8 *src_end, int min_match_len, int arg_14, const uint8 *window_base, int max_match_offset) { uint32 m32 = *(uint32*)src; int best_ml = 0, best_offs = 0, ml; LengthAndOffset result; if ((ml = Krak::GetMatchlengthQ(src, recents->offs[4], src_end, m32)) > best_ml) best_ml = ml, best_offs = 0; if ((ml = Krak::GetMatchlengthQ(src, recents->offs[5], src_end, m32)) > best_ml) best_ml = ml, best_offs = -1; if ((ml = Krak::GetMatchlengthQ(src, recents->offs[6], src_end, m32)) > best_ml) best_ml = ml, best_offs = -2; if (best_ml < 4) { if (arg_14 >= 56) { if (best_ml <= 2) best_ml = 0; min_match_len++; } uint best_m_ml = 0, best_m_offs = 0; for (size_t i = 0; i != 4; i++) { uint ml = matches[i].length; if (ml < min_match_len) break; if (ml > src_end - src && (ml = src_end - src) < min_match_len) break; uint offs = matches[i].offset; if (offs >= max_match_offset) continue; // Since decoder copies in 64-bit chunks we cannot support copies // that are longer than the offset, if offset is less than 8. if (offs < 8) { // Expand the offset to at least 8 bytes long uint tt = offs; do offs += tt; while (offs < 8); // Check if it's a valid offset and still match if (offs > src - window_base) continue; ml = GetMatchlengthQMin3(src, offs, src_end, m32); if (ml < min_match_len) continue; } if (KrakIsMatchLongEnough(ml, offs) && IsMatchBetter(ml, offs, best_m_ml, best_m_offs)) { best_m_offs = offs; best_m_ml = ml; } } if (IsBetterThanRecent(best_ml, best_m_ml, best_m_offs)) { best_ml = best_m_ml; best_offs = best_m_offs; } } result.length = best_ml; result.offset = best_offs; return result; } static int RunKrakenMatcherToGetStats(float *cost_ptr, int *chunk_type_ptr, Krak::Stats *stats, uint8 *dst_ptr, uint8 *dst_end, int min_match_len, LzCoder *lzcoder, LengthAndOffset *lao, const uint8 *src_ptr, int src_len, int start_pos, const uint8 *window_base, LzTemp *lztemp) { memset(stats, 0, sizeof(*stats)); KrakenRecentOffs recent; KrakEncLz kl; KrakEncLz_Init(&kl, lztemp, src_len, src_ptr, lzcoder->encode_flags); int initial_copy_bytes = (start_pos == 0) ? 8 : 0; int pos = initial_copy_bytes, last_pos = initial_copy_bytes; int dict_size = lzcoder->opts->dictionarySize > 0 && lzcoder->opts->dictionarySize <= 0x40000000 ? lzcoder->opts->dictionarySize : 0x40000000; while (pos < src_len - 16) { LengthAndOffset m0 = KrakenGetLzMatch(&lao[4 * pos], &recent, &src_ptr[pos], &src_ptr[src_len - 8], min_match_len, pos - last_pos, window_base, dict_size); if (m0.length == 0) { pos++; continue; } while (pos + 1 < src_len - 16) { LengthAndOffset m1 = KrakenGetLzMatch(&lao[4 * (pos + 1)], &recent, &src_ptr[pos + 1], &src_ptr[src_len - 8], min_match_len, pos + 1 - last_pos, window_base, dict_size); if (m1.length && GetLazyScore(m1, m0) > 0) { pos++; // lazy1 is better m0 = m1; } else { if (pos + 2 >= src_len - 16) break; LengthAndOffset m2 = KrakenGetLzMatch(&lao[4 * (pos + 2)], &recent, &src_ptr[pos + 2], &src_ptr[src_len - 8], min_match_len, pos + 2 - last_pos, window_base, dict_size); if (m2.length && GetLazyScore(m2, m0) > 3) { pos += 2; // lazy2 is better m0 = m2; } else { break; } } } if (pos - last_pos == 0 && m0.offset == 0 && recent.offs[4] == recent.offs[5]) m0.offset = -1; KrakEnc_AddToken(kl, recent, src_ptr + last_pos, pos - last_pos, m0.length, m0.offset); pos += m0.length; last_pos = pos; } KrakEnc_AddFinal(kl, src_ptr + last_pos, src_ptr + src_len); int opts_org = lzcoder->entropy_opts; int level_org = lzcoder->compression_level; lzcoder->entropy_opts &= ~kEntropyOpt_MultiArray; lzcoder->compression_level = 4; int result = Kraken_EncodeLzArrays(cost_ptr, chunk_type_ptr, stats, dst_ptr, dst_end, lzcoder, lztemp, &kl, start_pos); lzcoder->compression_level = level_org; lzcoder->entropy_opts = opts_org; return result; } static int Krak_EncodeTokens(LzTemp *lztemp, float *cost_ptr, int *chunk_type_ptr, const uint8 *src, int src_len, uint8 *dst, uint8 *dst_end, int start_pos, LzCoder *lzcoder, Krak::TokenArray *tokarray, Krak::Stats *stat) { *cost_ptr = kInvalidCost; const uint8 *src_end = src + src_len; int tok_count = tokarray->size; if (tok_count == 0) return src_len; KrakenRecentOffs recent; KrakEncLz kl; KrakEncLz_Init(&kl, lztemp, src_len, src, lzcoder->encode_flags); src += (start_pos == 0) ? 8 : 0; for (int i = 0; i < tok_count; i++) { Krak::Token *tok = tokarray->data + i; KrakEnc_AddToken(kl, recent, src, tok->litlen, tok->matchlen, tok->offset); src += tok->litlen + tok->matchlen; } KrakEnc_AddFinal(kl, src, src_end); return Kraken_EncodeLzArrays(cost_ptr, chunk_type_ptr, stat, dst, dst_end, lzcoder, lztemp, &kl, start_pos); } template static __forceinline bool UpdateState(int state_idx, int bits, int lrl, int ml, int recent, int prev_state, int qrm, Krak::State *S) { Krak::State *st = &S[state_idx]; if (bits < st->best_bit_count) { st->best_bit_count = bits; st->litlen = lrl; st->matchlen = ml; int *recent_offs_ptr = S[prev_state].recent_offs; if (IsRecent) { assert(recent >= 0 && recent <= 2); if (recent == 0) { st->recent_offs[0] = recent_offs_ptr[0]; st->recent_offs[1] = recent_offs_ptr[1]; st->recent_offs[2] = recent_offs_ptr[2]; } else if (recent == 1) { st->recent_offs[0] = recent_offs_ptr[1]; st->recent_offs[1] = recent_offs_ptr[0]; st->recent_offs[2] = recent_offs_ptr[2]; } else { st->recent_offs[0] = recent_offs_ptr[2]; st->recent_offs[1] = recent_offs_ptr[0]; st->recent_offs[2] = recent_offs_ptr[1]; } } else { st->recent_offs[0] = recent; st->recent_offs[1] = recent_offs_ptr[0]; st->recent_offs[2] = recent_offs_ptr[1]; } st->quick_recent_matchlen_litlen = qrm; st->prev_state = prev_state; return true; } return false; } template static __forceinline void UpdateStatesZ(int pos, int bits, int lrl, int ml, int recent, int prev_state, Krak::State *S, const uint8 *src_ptr, int offs, int Z, Krak::CostModel &cm, int *litindexes) { int after_match = pos + ml; UpdateState(after_match * Z, bits, lrl, ml, recent, prev_state, 0, S); for (int jj = 1; jj < Z; jj++) { bits += Krak::BitsForLit(src_ptr, after_match + jj - 1, offs, cm, jj - 1); if (UpdateState((after_match + jj) * Z + jj, bits, lrl, ml, recent, prev_state, 0, S) && jj == Z - 1) litindexes[after_match + jj] = jj; } } int KrakenOptimal(LzCoder *lzcoder, LzTemp *lztemp, const MatchLenStorage *mls, const uint8 *src_ptr, int src_size, uint8 *dst_ptr, uint8 *dst_end, int start_pos, int *chunk_type_ptr, float *cost_ptr) { *chunk_type_ptr = 0; if (src_size <= 128) return -1; int Z = (lzcoder->compression_level >= 8) ? 2 : 1; int four_or_eight = (lzcoder->compression_level >= 6) ? 8 : 4; int dict_size = lzcoder->opts->dictionarySize > 0 && lzcoder->opts->dictionarySize <= 0x40000000 ? lzcoder->opts->dictionarySize : 0x40000000; int initial_copy_bytes = (start_pos == 0) ? 8 : 0; const uint8 *src_end_safe = src_ptr + src_size - 8; int min_match_length = std::max(lzcoder->opts->min_match_length, 4); int length_long_enough_thres = 1 << std::min(8, lzcoder->compression_level); LengthAndOffset *lao = (LengthAndOffset *)lztemp->allmatch_scratch.Allocate(sizeof(LengthAndOffset) * 4 * src_size); const uint8 *window_base = mls->window_base; ExtractLaoFromMls(mls, src_ptr - window_base, src_size, lao, 4); Krak::TokenArray lz_token_array; lz_token_array.capacity = src_size / 2 + 8; lz_token_array.size = 0; lz_token_array.data = (Krak::Token*)lztemp->lztoken2_scratch.Allocate(sizeof(Krak::Token) * lz_token_array.capacity); int tokens_capacity = 4096 + 8; Krak::Token *tokens_begin = (Krak::Token*)lztemp->lztoken_scratch.Allocate(sizeof(Krak::Token) * tokens_capacity); Krak::State *S = (Krak::State*)lztemp->states.Allocate(sizeof(Krak::State) * Z * (src_size + 1)); uint8 *tmp_dst = (uint8*)S, *tmp_dst_end = tmp_dst + lztemp->states.size; int chunk_type = 0, tmp_chunk_type; Krak::CostModel cost_model; std::vector litindexes_arr; int *litindexes = NULL; Krak::Stats stats, tmp_stats; float cost = kInvalidCost; int n_first = RunKrakenMatcherToGetStats(&cost, chunk_type_ptr, &stats, dst_ptr, dst_end, min_match_length, lzcoder, lao, src_ptr, src_size, start_pos, window_base, lztemp); if (n_first >= src_size) return -1; float best_cost = cost; int best_length = n_first; if ((lzcoder->compression_level >= 7 || lzcoder->codec_id == 12) && lzcoder->opts->min_match_length <= 3) { cost = kInvalidCost; int tmp_chunk_type; int n = RunKrakenMatcherToGetStats(&cost, &tmp_chunk_type, &tmp_stats, tmp_dst, tmp_dst_end, 3, lzcoder, lao, src_ptr, src_size, start_pos, window_base, lztemp); if (cost < best_cost && n < src_size) { *chunk_type_ptr = tmp_chunk_type; memcpy(dst_ptr, tmp_dst, n); best_cost = cost; best_length = n; min_match_length = 3; stats = tmp_stats; } } if (lzcoder->opts->min_match_length < 8) { cost = kInvalidCost; int tmp_chunk_type; int n = RunKrakenMatcherToGetStats(&cost, &tmp_chunk_type, &tmp_stats, tmp_dst, tmp_dst_end, 8, lzcoder, lao, src_ptr, src_size, start_pos, window_base, lztemp); if (cost < best_cost && n < src_size) { *chunk_type_ptr = tmp_chunk_type; memcpy(dst_ptr, tmp_dst, n); best_cost = cost; best_length = n; stats = tmp_stats; } } if (lzcoder->compression_level >= 7 || lzcoder->codec_id == 12) min_match_length = std::max(lzcoder->opts->min_match_length, 3); if (Z > 1) { litindexes_arr.clear(); litindexes_arr.resize(src_size + 1); litindexes = litindexes_arr.data(); } int outer_loop_index = 0; for (;;) { cost_model.chunk_type = *chunk_type_ptr; cost_model.sub_or_copy_mask = (*chunk_type_ptr != 1) ? -1 : 0; if (lzcoder->last_chunk_type < 0) { Krak::Stats::Rescale(&stats); } else { Krak::Stats *h2 = (Krak::Stats*)lzcoder->lvsymstats_scratch.ptr; Krak::Stats::RescaleAdd(&stats, h2, lzcoder->last_chunk_type == *chunk_type_ptr); } cost_model.offs_encode_type = stats.offs_encode_type; Krak::MakeCostModel(&stats, &cost_model); for (int i = 0; i <= Z * src_size; i++) S[i].best_bit_count = INT_MAX; int final_lz_offset = -1; int last_recent0 = 0; lz_token_array.size = 0; int chunk_start = initial_copy_bytes; // Initial state S[Z * chunk_start].Initialize(); while (chunk_start < src_size - 16) { int lit_bits_since_prev = 0; int prev_offset = chunk_start; int chunk_end = chunk_start + Krak::kMaxBytesPerRound; if (chunk_end >= src_size - 32) chunk_end = src_size - 16; int max_offset = chunk_start + Krak::kMinBytesPerRound; if (max_offset >= src_size - 32) max_offset = src_size - 16; int bits_for_encoding_offset_8 = Krak::BitsForOffset(cost_model, 8); if (Z > 1) { for (int i = 1; i < Z; i++) for (int j = 0; j < Z; j++) S[Z * (chunk_start + i) + j].best_bit_count = INT_MAX; for (int j = 1; j < Z; j++) S[Z * chunk_start + j].best_bit_count = INT_MAX; if (max_offset - chunk_start > Z) { for (int i = 1; i < Z; i++) { S[(chunk_start + i) * Z + i] = S[(chunk_start + i - 1) * Z + i - 1]; S[(chunk_start + i) * Z + i].best_bit_count += Krak::BitsForLit(src_ptr, chunk_start + i - 1, S[chunk_start * Z].recent_offs[0], cost_model, i - 1); } litindexes[chunk_start + Z - 1] = Z - 1; } else { // prevent loop from running chunk_start = src_size - 16; } } for (int pos = chunk_start; max_offset <= chunk_end; pos++) { if (pos == src_size - 16) { max_offset = pos; break; } const uint8 *src_cur = &src_ptr[pos]; uint32 u32_at_cur = *(uint32*)src_cur; if (Z == 1) { // Accumulate lit bits since previous best position, but as soon as there exists a better // path to the state, reset the accumulation. if (pos != prev_offset) { lit_bits_since_prev += Krak::BitsForLit(src_ptr, pos - 1, S[prev_offset].recent_offs[0], cost_model, pos - prev_offset - 1); int curbits = S[pos].best_bit_count; if (curbits != INT_MAX) { int prevbits = S[prev_offset].best_bit_count + lit_bits_since_prev; if (curbits < prevbits + Krak::BitsForLitlen(cost_model, pos - prev_offset)) { prev_offset = pos; lit_bits_since_prev = 0; if (pos >= max_offset) { max_offset = pos; break; } } } } } else if (Z > 1) { if (pos >= max_offset) { int tmp_cur_offset = 0, best_bits = 0x7FFFFFFF; for (int i = 0; i < Z; i++) { if (S[Z * pos + i].best_bit_count < best_bits) { best_bits = S[Z * pos + i].best_bit_count; tmp_cur_offset = pos - ((i != Z - 1) ? i : litindexes[pos]); } } if (tmp_cur_offset >= max_offset) { max_offset = tmp_cur_offset; break; } } // Update the alternate state with the lit cost. Krak::State *cur = &S[Z * pos + Z - 1]; if (cur->best_bit_count != 0x7FFFFFFF) { int bits = cur->best_bit_count + Krak::BitsForLit(src_ptr, pos, cur->recent_offs[0], cost_model, litindexes[pos]); if (bits < cur[Z].best_bit_count) { cur[Z] = *cur; cur[Z].best_bit_count = bits; litindexes[pos + 1] = litindexes[pos] + 1; } } } // Extract the possible matches from the match table LengthAndOffset match[8]; int match_found_offset_bits[8]; int num_match = 0; for (int lao_index = 0; lao_index < 4; lao_index++) { uint lao_ml = lao[4 * pos + lao_index].length; uint lao_offs = lao[4 * pos + lao_index].offset; if (lao_ml < min_match_length) break; lao_ml = std::min(lao_ml, src_end_safe - src_cur); if (lao_offs >= dict_size) continue; if ((uint)lao_offs < 8) { // Expand the offset to at least 8 bytes long int tt = lao_offs; do lao_offs += tt; while ((uint)lao_offs < 8); // Check if it's a valid offset and still match if (lao_offs > src_cur - window_base) continue; lao_ml = GetMatchlengthQMin4(src_cur, lao_offs, src_end_safe, u32_at_cur); if (lao_ml < min_match_length) continue; } if (Krak::CheckMatchValidLength(lao_ml, lao_offs)) { match[num_match].length = lao_ml; match[num_match].offset = lao_offs; match_found_offset_bits[num_match++] = Krak::BitsForOffset(cost_model, lao_offs); } } // Also always check offset 8 int length = GetMatchlengthQMin3(src_cur, 8, src_end_safe, u32_at_cur); if (length >= min_match_length) { match[num_match].length = length; match[num_match].offset = 8; match_found_offset_bits[num_match++] = bits_for_encoding_offset_8; } int best_length_so_far = 0; int lits_since_prev = pos - prev_offset; int best_bits_y = 0x7FFFFFFF; // For each few literal lengths right before the current position, check if it improves things // by encoding the literals explicitly. for (int lazy = 0; lazy <= four_or_eight; lazy++) { int lrl, total_bits, prev_state; if (Z == 1) { lrl = (lazy == four_or_eight && lits_since_prev > four_or_eight) ? lits_since_prev : lazy; if (pos - lrl < chunk_start) break; prev_state = pos - lrl; total_bits = S[prev_state].best_bit_count; if (total_bits == INT_MAX) continue; total_bits += (lrl == lits_since_prev) ? lit_bits_since_prev : Krak::BitsForLits(src_ptr, pos - lrl, lrl, S[prev_state].recent_offs[0], cost_model, 0); } else { if (lazy < Z) { prev_state = Z * pos + lazy; total_bits = S[prev_state].best_bit_count; if (total_bits == INT_MAX) continue; lrl = (lazy == Z - 1) ? litindexes[pos] : lazy; } else { lrl = (lazy - 1); if (pos - lrl < chunk_start) break; prev_state = Z * (pos - lrl); total_bits = S[prev_state].best_bit_count; if (total_bits == INT_MAX) continue; total_bits += Krak::BitsForLits(src_ptr, pos - lrl, lrl, S[prev_state].recent_offs[0], cost_model, 0); } } int length_field = lrl; if (lrl >= 3) { length_field = 3; total_bits += Krak::BitsForLitlen(cost_model, lrl); } int recent_best_length = 0; // For each recent offset for (int ridx = 0; ridx < Krak::kRecentOffsetCount; ridx++) { int offs = S[prev_state].recent_offs[ridx]; int ml = Krak::GetMatchlengthQ(src_cur, offs, src_end_safe, u32_at_cur); if (ml <= recent_best_length) continue; recent_best_length = ml; max_offset = std::max(max_offset, pos + ml); int full_bits = total_bits + Krak::BitsForToken(cost_model, ml, pos - lrl, ridx, length_field); UpdateStatesZ(pos, full_bits, lrl, ml, ridx, prev_state, S, src_ptr, offs, Z, cost_model, litindexes); if (ml > 2 && ml < length_long_enough_thres) { for (int tml = 2; tml < ml; tml++) UpdateStatesZ(pos, total_bits + Krak::BitsForToken(cost_model, tml, pos - lrl, ridx, length_field), lrl, tml, ridx, prev_state, S, src_ptr, offs, Z, cost_model, litindexes); } // check if we have another recent0 match after 1-2 lits if (pos + ml + 4 < src_size - 16) { for (int num_lazy = 1; num_lazy <= 2; num_lazy++) { int tml = Krak::GetMatchlengthMin2(src_cur + ml + num_lazy, offs, src_end_safe); if (tml) { int cost = full_bits + Krak::BitsForLits(src_ptr, pos + ml, num_lazy, offs, cost_model, 0) + Krak::BitsForToken(cost_model, tml, pos + ml, 0, num_lazy); max_offset = std::max(max_offset, pos + ml + tml + num_lazy); UpdateState((pos + ml + tml + num_lazy) * Z, cost, lrl, ml, ridx, prev_state, num_lazy | (tml << 8), S); break; } } } } // For each recent offset - END best_length_so_far = std::max(best_length_so_far, recent_best_length); if (best_length_so_far >= length_long_enough_thres) break; if (total_bits < best_bits_y) { best_bits_y = total_bits; // For each match entry BEGIN for (int matchidx = 0; matchidx < num_match; ++matchidx) { int ml = match[matchidx].length; int offs = match[matchidx].offset; if (ml <= recent_best_length) break; int after_match = pos + ml; best_length_so_far = std::max(best_length_so_far, ml); max_offset = std::max(max_offset, after_match); int bits_with_offlen = total_bits + match_found_offset_bits[matchidx]; int full_bits = bits_with_offlen + Krak::BitsForToken(cost_model, ml, pos - lrl, Krak::kRecentOffsetCount, length_field); UpdateStatesZ(pos, full_bits, lrl, ml, offs, prev_state, S, src_ptr, offs, Z, cost_model, litindexes); if (ml > min_match_length && ml < length_long_enough_thres) { for (int tml = min_match_length; tml < ml; tml++) UpdateStatesZ(pos, bits_with_offlen + Krak::BitsForToken(cost_model, tml, pos - lrl, Krak::kRecentOffsetCount, length_field), lrl, tml, offs, prev_state, S, src_ptr, offs, Z, cost_model, litindexes); } // check if we have another recent0 match after 1-2 lits if (after_match + 4 < src_size - 16) { for (int num_lazy = 1; num_lazy <= 2; num_lazy++) { int tml = Krak::GetMatchlengthMin2(src_cur + ml + num_lazy, offs, src_end_safe); if (tml) { int cost = full_bits + Krak::BitsForLits(src_ptr, after_match, num_lazy, offs, cost_model, 0) + Krak::BitsForToken(cost_model, tml, after_match, 0, num_lazy); max_offset = std::max(max_offset, after_match + tml + num_lazy); UpdateState((after_match + tml + num_lazy) * Z, cost, lrl, ml, offs, prev_state, num_lazy | (tml << 8), S); break; } } } } // End of match Loop } } // Length is long enough to directly skip over some input if (best_length_so_far >= length_long_enough_thres) { int current_end = best_length_so_far + pos; if (max_offset == current_end) { max_offset = prev_offset = current_end; break; } if (Z == 1) { lit_bits_since_prev = 0; prev_offset = current_end; } else { int recent_offs = S[current_end * Z].recent_offs[0]; for (int i = 1; i < Z; i++) { S[(current_end + i) * Z + i] = S[(current_end + i - 1) * Z + i - 1]; S[(current_end + i) * Z + i].best_bit_count += Krak::BitsForLit(src_ptr, current_end + i - 1, recent_offs, cost_model, i - 1); } litindexes[current_end + Z - 1] = Z - 1; } pos = current_end - 1; } } // for (max_offset <= chunk_end) int last_state_index = Z * max_offset; bool reached_end = (max_offset >= src_size - 18); if (reached_end) { int best_bits = INT_MAX; if (Z == 1) { for (int final_offs = std::max(chunk_start, prev_offset - 8); final_offs < src_size; final_offs++) { int bits = S[final_offs].best_bit_count; if (bits != INT_MAX) { bits += Krak::BitsForLits(src_ptr, final_offs, src_size - final_offs, S[final_offs].recent_offs[0], cost_model, 0); if (bits < best_bits) { best_bits = bits; final_lz_offset = final_offs; last_state_index = final_offs; } } } } else { for (int final_offs = std::max(chunk_start, max_offset - 8); final_offs < src_size; final_offs++) { for (int idx = 0; idx < Z; idx++) { int bits = S[Z * final_offs + idx].best_bit_count; if (bits != INT_MAX) { int litidx = ((idx == Z - 1) ? litindexes[final_offs] : idx); int offs = final_offs - litidx; if (offs >= chunk_start) { bits += Krak::BitsForLits(src_ptr, final_offs, src_size - final_offs, S[Z * final_offs + idx].recent_offs[0], cost_model, litidx); if (bits < best_bits) { best_bits = bits; final_lz_offset = offs; last_state_index = Z * final_offs + idx; } } } } } } max_offset = final_lz_offset; last_recent0 = S[last_state_index].recent_offs[0]; } int outoffs = max_offset; int num_tokens = 0; Krak::State *state_cur = &S[last_state_index], *state_prev; while (outoffs != chunk_start) { uint32 qrm = state_cur->quick_recent_matchlen_litlen; if (qrm != 0) { outoffs = outoffs - (qrm >> 8) - (uint8)qrm; assert(num_tokens < tokens_capacity); tokens_begin[num_tokens].recent_offset0 = state_cur->recent_offs[0]; tokens_begin[num_tokens].offset = 0; tokens_begin[num_tokens].matchlen = qrm >> 8; tokens_begin[num_tokens].litlen = (uint8)qrm; num_tokens++; } outoffs = outoffs - state_cur->litlen - state_cur->matchlen; state_prev = &S[state_cur->prev_state]; int recent0 = state_cur->recent_offs[0]; int recent_index = Krak::GetRecentOffsetIndex(state_prev->recent_offs, recent0); assert(num_tokens < tokens_capacity); tokens_begin[num_tokens].recent_offset0 = state_prev->recent_offs[0]; tokens_begin[num_tokens].litlen = state_cur->litlen; tokens_begin[num_tokens].matchlen = state_cur->matchlen; tokens_begin[num_tokens].offset = (recent_index >= 0) ? -recent_index : recent0; num_tokens++; state_cur = state_prev; } std::reverse(tokens_begin, &tokens_begin[num_tokens]); memcpy(lz_token_array.data + lz_token_array.size, tokens_begin, sizeof(Krak::Token) * num_tokens); lz_token_array.size += num_tokens; if (reached_end) break; Krak::Stats::ReduceIfHigh(&stats); Krak::Stats::Update(&stats, src_ptr, chunk_start, tokens_begin, num_tokens); Krak::MakeCostModel(&stats, &cost_model); chunk_start = max_offset; } float cost = kInvalidCost; int n = Krak_EncodeTokens(lztemp, &cost, &tmp_chunk_type, src_ptr, src_size, tmp_dst, tmp_dst_end, start_pos, lzcoder, &lz_token_array, &stats); if (cost >= best_cost) break; *chunk_type_ptr = tmp_chunk_type; best_cost = cost; best_length = n; memcpy(dst_ptr, tmp_dst, n); if (lzcoder->compression_level < 8 || outer_loop_index || cost_model.chunk_type == tmp_chunk_type) { Krak::Stats *ks = (Krak::Stats *)lzcoder->lvsymstats_scratch.Allocate(sizeof(Krak::Stats)); *ks = stats; lzcoder->last_chunk_type = tmp_chunk_type; break; } lzcoder->last_chunk_type = -1; outer_loop_index = 1; } *cost_ptr = best_cost; return best_length; } int KrakenDoCompress(LzCoder *coder, LzTemp *lztemp, MatchLenStorage *mls, const uint8 *src, int src_size, uint8 *dst, uint8 *dst_end, int start_pos, int *chunk_type_ptr, float *cost_ptr) { if (coder->compression_level == 1) return KrakenCompressVeryfast<1, FastMatchHasher>(coder, lztemp, mls, src, src_size, dst, dst_end, start_pos, chunk_type_ptr, cost_ptr); if (coder->compression_level == -1) return KrakenCompressVeryfast<-1, FastMatchHasher>(coder, lztemp, mls, src, src_size, dst, dst_end, start_pos, chunk_type_ptr, cost_ptr); if (coder->compression_level == -2) return KrakenCompressVeryfast<-2, FastMatchHasher>(coder, lztemp, mls, src, src_size, dst, dst_end, start_pos, chunk_type_ptr, cost_ptr); if (coder->compression_level == -3) return KrakenCompressVeryfast<-3, FastMatchHasher>(coder, lztemp, mls, src, src_size, dst, dst_end, start_pos, chunk_type_ptr, cost_ptr); if (coder->compression_level == 2) return KrakenCompressFast<2, false, 0>(coder, lztemp, src, src_size, dst, dst_end, start_pos, chunk_type_ptr, cost_ptr); if (coder->compression_level == 3) return KrakenCompressFast<4, false, 1>(coder, lztemp, src, src_size, dst, dst_end, start_pos, chunk_type_ptr, cost_ptr); if (coder->compression_level == 4) return KrakenCompressFast<4, true, 2>(coder, lztemp, src, src_size, dst, dst_end, start_pos, chunk_type_ptr, cost_ptr); if (coder->compression_level >= 5) return KrakenOptimal(coder, lztemp, mls, src, src_size, dst, dst_end, start_pos, chunk_type_ptr, cost_ptr); assert(0); return -1; } void SetupEncoder_Kraken(LzCoder *coder, int src_len, int level, const CompressOptions *copts, const uint8 *src_base, const uint8 *src_start) { assert(src_base && src_start); int hash_bits = GetHashBits(src_len, std::max(level, 2), copts, 16, 20, 17, 24); coder->codec_id = kCompressorKraken; coder->quantum_blocksize = 0x20000; coder->check_plain_huffman = (level >= 3); coder->platforms = 0; coder->compression_level = level; coder->opts = copts; coder->speed_tradeoff = (copts->spaceSpeedTradeoffBytes * 0.00390625f) * 0.0099999998f; coder->max_matches_to_consider = 4; coder->limit_local_dictsize = (level >= 6); coder->compressor_file_id = 6; coder->encode_flags = 0; coder->entropy_opts = 0xff & ~kEntropyOpt_MultiArrayAdvanced; if (level >= 7) coder->entropy_opts |= kEntropyOpt_MultiArrayAdvanced; if (level >= 5) coder->encode_flags = 4; int min_match_len = 4; if (src_len > 0x10000 && level >= -2 && level <= 3 && IsProbablyText(src_start, src_len)) min_match_len = 6; if (level >= 0 && level <= 1) { if (copts->hashBits <= 0) hash_bits = std::min(hash_bits, 19); CreateLzHasher, 0x1000000>(coder, src_base, src_start, hash_bits, min_match_len); coder->entropy_opts &= ~(kEntropyOpt_tANS | kEntropyOpt_MultiArray | kEntropyOpt_RLE); } else if (level == -1) { if (copts->hashBits <= 0) hash_bits = std::min(hash_bits, 16); CreateLzHasher, 0x1000000>(coder, src_base, src_start, hash_bits, min_match_len); coder->entropy_opts &= ~(kEntropyOpt_tANS | kEntropyOpt_MultiArray | kEntropyOpt_RLE); } else if (level == -2) { if (copts->hashBits <= 0) hash_bits = std::min(hash_bits, 14); CreateLzHasher, 0x1000000>(coder, src_base, src_start, hash_bits, min_match_len); coder->entropy_opts &= ~(kEntropyOpt_tANS | kEntropyOpt_MultiArray | kEntropyOpt_RLE); } else if (level == -3) { if (copts->hashBits <= 0) hash_bits = std::min(hash_bits, 12); CreateLzHasher, 0x1000000>(coder, src_base, src_start, hash_bits, min_match_len); coder->entropy_opts &= ~(kEntropyOpt_tANS | kEntropyOpt_MultiArray | kEntropyOpt_RLE); } else if (level == 2) { CreateLzHasher< MatchHasher<2, false> >(coder, src_base, src_start, hash_bits, min_match_len); coder->entropy_opts &= ~(kEntropyOpt_tANS | kEntropyOpt_MultiArray); } else if (level == 3) { CreateLzHasher< MatchHasher<4, false> >(coder, src_base, src_start, hash_bits, min_match_len); coder->entropy_opts &= ~(kEntropyOpt_tANS | kEntropyOpt_MultiArray); } else if (level == 4) { CreateLzHasher< MatchHasher<4, true> >(coder, src_base, src_start, hash_bits, 0); coder->entropy_opts &= ~(kEntropyOpt_tANS | kEntropyOpt_MultiArrayAdvanced); } }