// File: lzham_lzcomp_internal.h // See Copyright Notice and license at the end of include/lzham.h #pragma once #include "lzham_match_accel.h" #include "lzham_symbol_codec.h" #include "lzham_lzbase.h" namespace lzham { typedef lzham::vector byte_vec; const uint cMaxParseGraphNodes = 3072; const uint cMaxParseThreads = 8; enum compression_level { cCompressionLevelFastest, cCompressionLevelFaster, cCompressionLevelDefault, cCompressionLevelBetter, cCompressionLevelUber, cCompressionLevelCount }; struct comp_settings { uint m_fast_bytes; bool m_fast_adaptive_huffman_updating; bool m_use_polar_codes; uint m_match_accel_max_matches_per_probe; uint m_match_accel_max_probes; }; class lzcompressor : public CLZBase { public: lzcompressor(); struct init_params { enum { cMinDictSizeLog2 = CLZBase::cMinDictSizeLog2, cMaxDictSizeLog2 = CLZBase::cMaxDictSizeLog2, cDefaultBlockSize = 1024U*512U }; init_params() : m_pTask_pool(NULL), m_max_helper_threads(0), m_compression_level(cCompressionLevelDefault), m_dict_size_log2(22), m_block_size(cDefaultBlockSize), m_num_cachelines(0), m_cacheline_size(0), m_lzham_compress_flags(0), m_pSeed_bytes(0), m_num_seed_bytes(0) { } task_pool* m_pTask_pool; uint m_max_helper_threads; compression_level m_compression_level; uint m_dict_size_log2; uint m_block_size; uint m_num_cachelines; uint m_cacheline_size; uint m_lzham_compress_flags; const void *m_pSeed_bytes; uint m_num_seed_bytes; }; bool init(const init_params& params); void clear(); // sync, or sync+dictionary flush bool flush(lzham_flush_t flush_type); bool reset(); bool put_bytes(const void* pBuf, uint buf_len); const byte_vec& get_compressed_data() const { return m_comp_buf; } byte_vec& get_compressed_data() { return m_comp_buf; } uint32 get_src_adler32() const { return m_src_adler32; } private: class state; enum { cLitComplexity = 1, cRep0Complexity = 2, cRep3Complexity = 5, cLongMatchComplexity = 6, cLongMatchComplexityLenThresh = 9, cShortMatchComplexity = 7 }; struct lzdecision { int m_pos; // dict position where decision was evaluated int m_len; // 0 if literal, 1+ if match int m_dist; // <0 if match rep, else >=1 is match dist inline lzdecision() { } inline lzdecision(int pos, int len, int dist) : m_pos(pos), m_len(len), m_dist(dist) { } inline void init(int pos, int len, int dist) { m_pos = pos; m_len = len; m_dist = dist; } inline bool is_lit() const { return !m_len; } inline bool is_match() const { return m_len > 0; } // may be a rep or full match inline bool is_full_match() const { return (m_len > 0) && (m_dist >= 1); } inline uint get_len() const { return math::maximum(m_len, 1); } inline bool is_rep() const { return m_dist < 0; } inline bool is_rep0() const { return m_dist == -1; } uint get_match_dist(const state& s) const; inline uint get_complexity() const { if (is_lit()) return cLitComplexity; else if (is_rep()) { LZHAM_ASSUME(cRep0Complexity == 2); return 1 + -m_dist; // 2, 3, 4, or 5 } else if (get_len() >= cLongMatchComplexityLenThresh) return cLongMatchComplexity; else return cShortMatchComplexity; } inline uint get_min_codable_len() const { if (is_lit() || is_rep0()) return 1; else return CLZBase::cMinMatchLen; } }; struct lzpriced_decision : lzdecision { lzpriced_decision() { } inline lzpriced_decision(int pos, int len, int dist) : lzdecision(pos, len, dist) { } inline lzpriced_decision(int pos, int len, int dist, bit_cost_t cost) : lzdecision(pos, len, dist), m_cost(cost) { } inline void init(int pos, int len, int dist, bit_cost_t cost) { lzdecision::init(pos, len, dist); m_cost = cost; } inline bit_cost_t get_cost() const { return m_cost; } bit_cost_t m_cost; }; struct state_base { uint m_cur_ofs; uint m_cur_state; uint m_match_hist[CLZBase::cMatchHistSize]; inline bool operator== (const state_base &rhs) const { if (m_cur_state != rhs.m_cur_state) return false; for (uint i = 0; i < CLZBase::cMatchHistSize; i++) if (m_match_hist[i] != rhs.m_match_hist[i]) return false; return true; } void partial_advance(const lzdecision& lzdec); inline void save_partial_state(state_base& dst) { dst.m_cur_ofs = m_cur_ofs; dst.m_cur_state = m_cur_state; memcpy(dst.m_match_hist, m_match_hist, sizeof(m_match_hist)); } inline void restore_partial_state(const state_base& src) { m_cur_ofs = src.m_cur_ofs; m_cur_state = src.m_cur_state; memcpy(m_match_hist, src.m_match_hist, sizeof(m_match_hist)); } }; class state : public state_base { public: state(); void clear(); bool init(CLZBase& lzbase, bool fast_adaptive_huffman_updating, bool use_polar_codes); void reset(); bit_cost_t get_cost(CLZBase& lzbase, const search_accelerator& dict, const lzdecision& lzdec) const; bit_cost_t get_len2_match_cost(CLZBase& lzbase, uint dict_pos, uint len2_match_dist, uint is_match_model_index); bit_cost_t get_lit_cost(const search_accelerator& dict, uint dict_pos, uint lit_pred0, uint is_match_model_index) const; // Returns actual cost. void get_rep_match_costs(uint dict_pos, bit_cost_t *pBitcosts, uint match_hist_index, int min_len, int max_len, uint is_match_model_index) const; void get_full_match_costs(CLZBase& lzbase, uint dict_pos, bit_cost_t *pBitcosts, uint match_dist, int min_len, int max_len, uint is_match_model_index) const; bit_cost_t update_stats(CLZBase& lzbase, const search_accelerator& dict, const lzdecision& lzdec); bool advance(CLZBase& lzbase, const search_accelerator& dict, const lzdecision& lzdec); bool encode(symbol_codec& codec, CLZBase& lzbase, const search_accelerator& dict, const lzdecision& lzdec); void print(symbol_codec& codec, CLZBase& lzbase, const search_accelerator& dict, const lzdecision& lzdec); bool encode_eob(symbol_codec& codec, const search_accelerator& dict, uint dict_pos); bool encode_reset_state_partial(symbol_codec& codec, const search_accelerator& dict, uint dict_pos); void update_match_hist(uint match_dist); int find_match_dist(uint match_hist) const; void reset_state_partial(); void start_of_block(const search_accelerator& dict, uint cur_ofs, uint block_index); void reset_update_rate(); uint get_pred_char(const search_accelerator& dict, int pos, int backward_ofs) const; inline bool will_reference_last_match(const lzdecision& lzdec) const { return (!lzdec.is_match()) && (m_cur_state >= CLZBase::cNumLitStates); } uint m_block_start_dict_ofs; adaptive_bit_model m_is_match_model[CLZBase::cNumStates * (1 << CLZBase::cNumIsMatchContextBits)]; adaptive_bit_model m_is_rep_model[CLZBase::cNumStates]; adaptive_bit_model m_is_rep0_model[CLZBase::cNumStates]; adaptive_bit_model m_is_rep0_single_byte_model[CLZBase::cNumStates]; adaptive_bit_model m_is_rep1_model[CLZBase::cNumStates]; adaptive_bit_model m_is_rep2_model[CLZBase::cNumStates]; #if LZHAM_USE_ALL_ARITHMETIC_CODING typedef adaptive_arith_data_model sym_data_model; #else typedef quasi_adaptive_huffman_data_model sym_data_model; #endif sym_data_model m_lit_table[1 << CLZBase::cNumLitPredBits]; sym_data_model m_delta_lit_table[1 << CLZBase::cNumDeltaLitPredBits]; sym_data_model m_main_table; sym_data_model m_rep_len_table[2]; sym_data_model m_large_len_table[2]; sym_data_model m_dist_lsb_table; }; class tracked_stat { public: tracked_stat() { clear(); } void clear() { m_num = 0; m_total = 0.0f; m_total2 = 0.0f; m_min_val = 9e+99; m_max_val = -9e+99; } void update(double val) { m_num++; m_total += val; m_total2 += val * val; m_min_val = LZHAM_MIN(m_min_val, val); m_max_val = LZHAM_MAX(m_max_val, val); } tracked_stat &operator += (double val) { update(val); return *this; } operator double() const { return m_total; } uint64 get_number_of_values() { return m_num; } uint32 get_number_of_values32() { return static_cast(LZHAM_MIN(UINT_MAX, m_num)); } double get_total() const { return m_total; } double get_average() const { return m_num ? m_total / m_num : 0.0f; }; double get_std_dev() const { return m_num ? sqrt( m_num * m_total2 - m_total * m_total ) / m_num: 0.0f; } double get_min_val() const { return m_num ? m_min_val : 0.0f; } double get_max_val() const { return m_num ? m_max_val : 0.0f; } private: uint64 m_num; double m_total; double m_total2; double m_min_val; double m_max_val; }; struct coding_stats { coding_stats() { clear(); } void clear(); void update(const lzdecision& lzdec, const state& cur_state, const search_accelerator& dict, bit_cost_t cost); void print(); uint m_total_bytes; uint m_total_contexts; double m_total_cost; tracked_stat m_context_stats; double m_total_match_bits_cost; double m_worst_match_bits_cost; double m_total_is_match0_bits_cost; double m_total_is_match1_bits_cost; uint m_total_truncated_matches; uint m_match_truncation_len_hist[CLZBase::cMaxMatchLen + 1]; uint m_match_truncation_hist[CLZBase::cMaxMatchLen + 1]; uint m_match_type_truncation_hist[CLZBase::cNumStates][5]; uint m_match_type_was_not_truncated_hist[CLZBase::cNumStates][5]; uint m_total_nonmatches; uint m_total_matches; tracked_stat m_lit_stats; tracked_stat m_delta_lit_stats; tracked_stat m_rep_stats[CLZBase::cMatchHistSize]; tracked_stat m_rep0_len1_stats; tracked_stat m_rep0_len2_plus_stats; tracked_stat m_full_match_stats[cMaxMatchLen + 1]; uint m_total_far_len2_matches; uint m_total_near_len2_matches; uint m_total_update_rate_resets; uint m_max_len2_dist; }; init_params m_params; comp_settings m_settings; int64 m_src_size; uint32 m_src_adler32; search_accelerator m_accel; symbol_codec m_codec; coding_stats m_stats; byte_vec m_block_buf; byte_vec m_comp_buf; uint m_step; uint m_block_start_dict_ofs; uint m_block_index; bool m_finished; bool m_use_task_pool; struct node_state { LZHAM_FORCE_INLINE void clear() { m_total_cost = cBitCostMax; //math::cNearlyInfinite; m_total_complexity = UINT_MAX; } // the lzdecision that led from parent to this node_state lzdecision m_lzdec; // This is either the state of the parent node (optimal parsing), or the state of the child node (extreme parsing). state::state_base m_saved_state; // Total cost to arrive at this node state. bit_cost_t m_total_cost; uint m_total_complexity; // Parent node index. int16 m_parent_index; // Parent node state index (only valid when extreme parsing). int8 m_parent_state_index; }; struct node { LZHAM_FORCE_INLINE void clear() { m_num_node_states = 0; } uint m_num_node_states; enum { cMaxNodeStates = 4 }; node_state m_node_states[cMaxNodeStates]; void add_state(int parent_index, int parent_state_index, const lzdecision &lzdec, state &parent_state, bit_cost_t total_cost, uint total_complexity); }; state m_start_of_block_state; // state at start of block state m_state; // main thread's current coding state struct raw_parse_thread_state { uint m_start_ofs; uint m_bytes_to_match; state m_initial_state; node m_nodes[cMaxParseGraphNodes + 1]; lzham::vector m_best_decisions; bool m_emit_decisions_backwards; lzham::vector m_temp_decisions; uint m_max_greedy_decisions; uint m_greedy_parse_total_bytes_coded; bool m_greedy_parse_gave_up; bool m_issue_reset_state_partial; bool m_failed; }; struct parse_thread_state : raw_parse_thread_state { uint8 m_unused_alignment_array[128 - (sizeof(raw_parse_thread_state) & 127)]; }; uint m_num_parse_threads; parse_thread_state m_parse_thread_state[cMaxParseThreads + 1]; // +1 extra for the greedy parser thread (only used for delta compression) volatile atomic32_t m_parse_jobs_remaining; semaphore m_parse_jobs_complete; enum { cMaxBlockHistorySize = 6, cBlockHistoryCompRatioScale = 1000U }; struct block_history { uint m_comp_size; uint m_src_size; uint m_ratio; bool m_raw_block; bool m_reset_update_rate; }; block_history m_block_history[cMaxBlockHistorySize]; uint m_block_history_size; uint m_block_history_next; void update_block_history(uint comp_size, uint src_size, uint ratio, bool raw_block, bool reset_update_rate); uint get_recent_block_ratio(); uint get_min_block_ratio(); uint get_max_block_ratio(); uint get_total_recent_reset_update_rate(); bool send_zlib_header(); bool init_seed_bytes(); bool send_final_block(); bool send_configuration(); bool extreme_parse(parse_thread_state &parse_state); bool optimal_parse(parse_thread_state &parse_state); int enumerate_lz_decisions(uint ofs, const state& cur_state, lzham::vector& decisions, uint min_match_len, uint max_match_len); bool greedy_parse(parse_thread_state &parse_state); void parse_job_callback(uint64 data, void* pData_ptr); bool compress_block(const void* pBuf, uint buf_len); bool compress_block_internal(const void* pBuf, uint buf_len); bool code_decision(lzdecision lzdec, uint& cur_ofs, uint& bytes_to_match); bool send_sync_block(lzham_flush_t flush_type); }; } // namespace lzham