/*
 * Copyright (C) 2020 Siara Logics (cc)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * @author Arundale Ramanathan
 *
 */
/**
 * @file unishox2.c
 * @author Arundale Ramanathan, James Z. M. Gao
 * @brief Main code of Unishox2 Compression and Decompression library
 *
 * This file implements the code for the Unishox API function \n
 * defined in unishox2.h
 */

#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdint.h>
#include <limits.h>

#include "unishox2.h"

/// uint8_t is unsigned char
typedef unsigned char uint8_t;

/// possible horizontal sets and states
enum {USX_ALPHA = 0, USX_SYM, USX_NUM, USX_DICT, USX_DELTA, USX_NUM_TEMP};

/// This 2D array has the characters for the sets USX_ALPHA, USX_SYM and USX_NUM. Where a character cannot fit into a uint8_t, 0 is used and handled in code.
uint8_t usx_sets[][28] = {{  0, ' ', 'e', 't', 'a', 'o', 'i', 'n',
                        's', 'r', 'l', 'c', 'd', 'h', 'u', 'p', 'm', 'b',
                        'g', 'w', 'f', 'y', 'v', 'k', 'q', 'j', 'x', 'z'},
                       {'"', '{', '}', '_', '<', '>', ':', '\n',
                          0, '[', ']', '\\', ';', '\'', '\t', '@', '*', '&',
                        '?', '!', '^', '|', '\r', '~', '`', 0, 0, 0},
                       {  0, ',', '.', '0', '1', '9', '2', '5', '-',
                        '/', '3', '4', '6', '7', '8', '(', ')', ' ',
                        '=', '+', '$', '%', '#', 0, 0, 0, 0, 0}};

/// Stores position of letter in usx_sets.
/// First 3 bits - position in usx_hcodes
/// Next  5 bits - position in usx_vcodes
uint8_t usx_code_94[94];

/// Vertical codes starting from the MSB
uint8_t usx_vcodes[]   = { 0x00, 0x40, 0x60, 0x80, 0x90, 0xA0, 0xB0,
                        0xC0, 0xD0, 0xD8, 0xE0, 0xE4, 0xE8, 0xEC,
                        0xEE, 0xF0, 0xF2, 0xF4, 0xF6, 0xF7, 0xF8,
                        0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF };

/// Length of each veritical code
uint8_t usx_vcode_lens[] = {  2,    3,    3,    4,    4,    4,    4,
                           4,    5,    5,    6,    6,    6,    7,
                           7,    7,    7,    7,    8,    8,    8,
                           8,    8,    8,    8,    8,    8,    8 };

/// Vertical Codes and Set number for frequent sequences in sets USX_SYM and USX_NUM. First 3 bits indicate set (USX_SYM/USX_NUM) and rest are vcode positions
uint8_t usx_freq_codes[] = {(1 << 5) + 25, (1 << 5) + 26, (1 << 5) + 27, (2 << 5) + 23, (2 << 5) + 24, (2 << 5) + 25};

/// Not used
const int UTF8_MASK[] = {0xE0, 0xF0, 0xF8};
/// Not used
const int UTF8_PREFIX[] = {0xC0, 0xE0, 0xF0};

/// Minimum length to consider as repeating sequence
#define NICE_LEN 5

/// Set (USX_NUM - 2) and vertical code (26) for encoding repeating letters
#define RPT_CODE ((2 << 5) + 26)
/// Set (USX_NUM - 2) and vertical code (27) for encoding terminator
#define TERM_CODE ((2 << 5) + 27)
/// Set (USX_SYM - 1) and vertical code (7) for encoding Line feed \\n
#define LF_CODE ((1 << 5) + 7)
/// Set (USX_NUM - 1) and vertical code (8) for encoding \\r\\n
#define CRLF_CODE ((1 << 5) + 8)
/// Set (USX_NUM - 1) and vertical code (22) for encoding \\r
#define CR_CODE ((1 << 5) + 22)
/// Set (USX_NUM - 1) and vertical code (14) for encoding \\t
#define TAB_CODE  ((1 << 5) + 14)
/// Set (USX_NUM - 2) and vertical code (17) for space character when it appears in USX_NUM state \\r
#define NUM_SPC_CODE ((2 << 5) + 17)

/// Code for special code (11111) when state=USX_DELTA
#define UNI_STATE_SPL_CODE 0xF8
/// Length of Code for special code when state=USX_DELTA
#define UNI_STATE_SPL_CODE_LEN 5
/// Code for switch code when state=USX_DELTA
#define UNI_STATE_SW_CODE 0x80
/// Length of Code for Switch code when state=USX_DELTA
#define UNI_STATE_SW_CODE_LEN 2

/// Switch code in USX_ALPHA and USX_NUM 00
#define SW_CODE 0
/// Length of Switch code
#define SW_CODE_LEN 2
/// Terminator bit sequence for Preset 1. Length varies depending on state as per following macros
#define TERM_BYTE_PRESET_1 0
/// Length of Terminator bit sequence when state is lower
#define TERM_BYTE_PRESET_1_LEN_LOWER 6
/// Length of Terminator bit sequence when state is upper
#define TERM_BYTE_PRESET_1_LEN_UPPER 4

/// Offset at which usx_code_94 starts
#define USX_OFFSET_94 33

/// global to indicate whether initialization is complete or not
uint8_t is_inited = 0;

/// Fills the usx_code_94 94 letter array based on sets of characters at usx_sets \n
/// For each element in usx_code_94, first 3 msb bits is set (USX_ALPHA / USX_SYM / USX_NUM) \n
/// and the rest 5 bits indicate the vertical position in the corresponding set
void init_coder() {
  if (is_inited)
    return;
  memset(usx_code_94, '\0', sizeof(usx_code_94));
  for (int i = 0; i < 3; i++) {
    for (int j = 0; j < 28; j++) {
      uint8_t c = usx_sets[i][j];
      if (c > 32) {
        usx_code_94[c - USX_OFFSET_94] = (i << 5) + j;
        if (c >= 'a' && c <= 'z')
          usx_code_94[c - USX_OFFSET_94 - ('a' - 'A')] = (i << 5) + j;
      }
    }
  }
  is_inited = 1;
}

/// Mask for retrieving each code to be encoded according to its length
unsigned int usx_mask[] = {0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF};

/// Appends specified number of bits to the output (out) \n
/// If maximum limit (olen) is reached, -1 is returned \n
/// Otherwise clen bits in code are appended to out starting with MSB
int append_bits(char *out, int olen, int ol, uint8_t code, int clen) {


  //printf("%d,%x,%d,%d\n", ol, code, clen, state);

  while (clen > 0) {
    int oidx;
    unsigned char a_byte;

    uint8_t cur_bit = ol % 8;
    uint8_t blen = clen;
    a_byte = code & usx_mask[blen - 1];
    a_byte >>= cur_bit;
    if (blen + cur_bit > 8)
      blen = (8 - cur_bit);
    oidx = ol / 8;
    if (oidx < 0 || olen <= oidx)
      return -1;
    if (cur_bit == 0)
      out[oidx] = a_byte;
    else
      out[oidx] |= a_byte;
    code <<= blen;
    ol += blen;
    clen -= blen;
  }
  return ol;
}

/// This is a safe call to append_bits() making sure it does not write past olen
#define SAFE_APPEND_BITS(exp) do { \
  const int newidx = (exp); \
  if (newidx < 0) return newidx; \
} while (0)

/// Appends switch code to out depending on the state (USX_DELTA or other)
int append_switch_code(char *out, int olen, int ol, uint8_t state) {
  if (state == USX_DELTA) {
    SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, UNI_STATE_SPL_CODE, UNI_STATE_SPL_CODE_LEN));
    SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, UNI_STATE_SW_CODE, UNI_STATE_SW_CODE_LEN));
  } else
    SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, SW_CODE, SW_CODE_LEN));
  return ol;
}

/// Appends given horizontal and veritical code bits to out
int append_code(char *out, int olen, int ol, uint8_t code, uint8_t *state, const uint8_t usx_hcodes[], const uint8_t usx_hcode_lens[]) {
  uint8_t hcode = code >> 5;
  uint8_t vcode = code & 0x1F;
  if (!usx_hcode_lens[hcode] && hcode != USX_ALPHA)
    return ol;
  switch (hcode) {
    case USX_ALPHA:
      if (*state != USX_ALPHA) {
        SAFE_APPEND_BITS(ol = append_switch_code(out, olen, ol, *state));
        SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA]));
        *state = USX_ALPHA;
      }
      break;
    case USX_SYM:
      SAFE_APPEND_BITS(ol = append_switch_code(out, olen, ol, *state));
      SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, usx_hcodes[USX_SYM], usx_hcode_lens[USX_SYM]));
      break;
    case USX_NUM:
      if (*state != USX_NUM) {
        SAFE_APPEND_BITS(ol = append_switch_code(out, olen, ol, *state));
        SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, usx_hcodes[USX_NUM], usx_hcode_lens[USX_NUM]));
        if (usx_sets[hcode][vcode] >= '0' && usx_sets[hcode][vcode] <= '9')
          *state = USX_NUM;
      }
  }
  SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, usx_vcodes[vcode], usx_vcode_lens[vcode]));
  return ol;
}

/// Length of bits used to represent count for each level
const uint8_t count_bit_lens[5] = {2, 4, 7, 11, 16};
/// Cumulative counts represented at each level
const int32_t count_adder[5] = {4, 20, 148, 2196, 67732};
/// Codes used to specify the level that the count belongs to
const uint8_t count_codes[] = {0x01, 0x82, 0xC3, 0xE4, 0xF4};
/// Encodes given count to out
int encodeCount(char *out, int olen, int ol, int count) {
  // First five bits are code and Last three bits of codes represent length
  for (int i = 0; i < 5; i++) {
    if (count < count_adder[i]) {
      SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, (count_codes[i] & 0xF8), count_codes[i] & 0x07));
      uint16_t count16 = (count - (i ? count_adder[i - 1] : 0)) << (16 - count_bit_lens[i]);
      if (count_bit_lens[i] > 8) {
        SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, count16 >> 8, 8));
        SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, count16 & 0xFF, count_bit_lens[i] - 8));
      } else
        SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, count16 >> 8, count_bit_lens[i]));
      return ol;
    }
  }
  return ol;
}

/// Length of bits used to represent delta code for each level
const uint8_t uni_bit_len[5] = {6, 12, 14, 16, 21};
/// Cumulative delta codes represented at each level
const int32_t uni_adder[5] = {0, 64, 4160, 20544, 86080};

/// Encodes the unicode code point given by code to out. prev_code is used to calculate the delta
int encodeUnicode(char *out, int olen, int ol, int32_t code, int32_t prev_code) {
  // First five bits are code and Last three bits of codes represent length
  //const uint8_t codes[8] = {0x00, 0x42, 0x83, 0xA3, 0xC3, 0xE4, 0xF5, 0xFD};
  const uint8_t codes[6] = {0x01, 0x82, 0xC3, 0xE4, 0xF5, 0xFD};
  int32_t till = 0;
  int32_t diff = code - prev_code;
  if (diff < 0)
    diff = -diff;
  //printf("%ld, ", code);
  //printf("Diff: %d\n", diff);
  for (int i = 0; i < 5; i++) {
    till += (1 << uni_bit_len[i]);
    if (diff < till) {
      SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, (codes[i] & 0xF8), codes[i] & 0x07));
      //if (diff) {
        SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, prev_code > code ? 0x80 : 0, 1));
        int32_t val = diff - uni_adder[i];
        //printf("Val: %d\n", val);
        if (uni_bit_len[i] > 16) {
          val <<= (24 - uni_bit_len[i]);
          SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, val >> 16, 8));
          SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, (val >> 8) & 0xFF, 8));
          SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, val & 0xFF, uni_bit_len[i] - 16));
        } else
        if (uni_bit_len[i] > 8) {
          val <<= (16 - uni_bit_len[i]);
          SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, val >> 8, 8));
          SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, val & 0xFF, uni_bit_len[i] - 8));
        } else {
          val <<= (8 - uni_bit_len[i]);
          SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, val & 0xFF, uni_bit_len[i]));
        }
      return ol;
    }
  }
  return ol;
}

/// Reads UTF-8 character from in. Also returns the number of bytes occupied by the UTF-8 character in utf8len
int32_t readUTF8(const char *in, int len, int l, int *utf8len) {
  int32_t ret = 0;
  if (l < (len - 1) && (in[l] & 0xE0) == 0xC0 && (in[l + 1] & 0xC0) == 0x80) {
    *utf8len = 2;
    ret = (in[l] & 0x1F);
    ret <<= 6;
    ret += (in[l + 1] & 0x3F);
    if (ret < 0x80)
      ret = 0;
  } else
  if (l < (len - 2) && (in[l] & 0xF0) == 0xE0 && (in[l + 1] & 0xC0) == 0x80
          && (in[l + 2] & 0xC0) == 0x80) {
    *utf8len = 3;
    ret = (in[l] & 0x0F);
    ret <<= 6;
    ret += (in[l + 1] & 0x3F);
    ret <<= 6;
    ret += (in[l + 2] & 0x3F);
    if (ret < 0x0800)
      ret = 0;
  } else
  if (l < (len - 3) && (in[l] & 0xF8) == 0xF0 && (in[l + 1] & 0xC0) == 0x80
          && (in[l + 2] & 0xC0) == 0x80 && (in[l + 3] & 0xC0) == 0x80) {
    *utf8len = 4;
    ret = (in[l] & 0x07);
    ret <<= 6;
    ret += (in[l + 1] & 0x3F);
    ret <<= 6;
    ret += (in[l + 2] & 0x3F);
    ret <<= 6;
    ret += (in[l + 3] & 0x3F);
    if (ret < 0x10000)
      ret = 0;
  }
  return ret;
}

/// Finds the longest matching sequence from the beginning of the string. \n
/// If a match is found and it is longer than NICE_LEN, it is encoded as a repeating sequence to out \n
/// This is also used for Unicode strings \n
/// This is a crude implementation that is not optimized.  Assuming only short strings \n
/// are encoded, this is not much of an issue.
int matchOccurance(const char *in, int len, int l, char *out, int olen, int *ol, uint8_t *state, const uint8_t usx_hcodes[], const uint8_t usx_hcode_lens[]) {
  int j, k;
  int longest_dist = 0;
  int longest_len = 0;
  for (j = l - NICE_LEN; j >= 0; j--) {
    for (k = l; k < len && j + k - l < l; k++) {
      if (in[k] != in[j + k - l])
        break;
    }
    while ((((unsigned char) in[k]) >> 6) == 2)
      k--; // Skip partial UTF-8 matches
    //if ((in[k - 1] >> 3) == 0x1E || (in[k - 1] >> 4) == 0x0E || (in[k - 1] >> 5) == 0x06)
    //  k--;
    if ((k - l) > (NICE_LEN - 1)) {
      int match_len = k - l - NICE_LEN;
      int match_dist = l - j - NICE_LEN + 1;
      if (match_len > longest_len) {
          longest_len = match_len;
          longest_dist = match_dist;
      }
    }
  }
  if (longest_len) {
    SAFE_APPEND_BITS(*ol = append_switch_code(out, olen, *ol, *state));
    SAFE_APPEND_BITS(*ol = append_bits(out, olen, *ol, usx_hcodes[USX_DICT], usx_hcode_lens[USX_DICT]));
    //printf("Len:%d / Dist:%d/%.*s\n", longest_len, longest_dist, longest_len + NICE_LEN, in + l - longest_dist - NICE_LEN + 1);
    SAFE_APPEND_BITS(*ol = encodeCount(out, olen, *ol, longest_len));
    SAFE_APPEND_BITS(*ol = encodeCount(out, olen, *ol, longest_dist));
    l += (longest_len + NICE_LEN);
    l--;
    return l;
  }
  return -l;
}

/// This is used only when encoding a string array
/// Finds the longest matching sequence from the previous array element to the beginning of the string array. \n
/// If a match is found and it is longer than NICE_LEN, it is encoded as a repeating sequence to out \n
/// This is also used for Unicode strings \n
/// This is a crude implementation that is not optimized.  Assuming only short strings \n
/// are encoded, this is not much of an issue.
int matchLine(const char *in, int len, int l, char *out, int olen, int *ol, struct us_lnk_lst *prev_lines, uint8_t *state, const uint8_t usx_hcodes[], const uint8_t usx_hcode_lens[]) {
  int last_ol = *ol;
  int last_len = 0;
  int last_dist = 0;
  int last_ctx = 0;
  int line_ctr = 0;
  int j = 0;
  do {
    int i, k;
    int line_len = (int)strlen(prev_lines->data);
    int limit = (line_ctr == 0 ? l : line_len);
    for (; j < limit; j++) {
      for (i = l, k = j; k < line_len && i < len; k++, i++) {
        if (prev_lines->data[k] != in[i])
          break;
      }
      while ((((unsigned char) prev_lines->data[k]) >> 6) == 2)
        k--; // Skip partial UTF-8 matches
      if ((k - j) >= NICE_LEN) {
        if (last_len) {
          if (j > last_dist)
            continue;
          //int saving = ((k - j) - last_len) + (last_dist - j) + (last_ctx - line_ctr);
          //if (saving < 0) {
          //  //printf("No savng: %d\n", saving);
          //  continue;
          //}
          *ol = last_ol;
        }
        last_len = (k - j);
        last_dist = j;
        last_ctx = line_ctr;
        SAFE_APPEND_BITS(*ol = append_switch_code(out, olen, *ol, *state));
        SAFE_APPEND_BITS(*ol = append_bits(out, olen, *ol, usx_hcodes[USX_DICT], usx_hcode_lens[USX_DICT]));
        SAFE_APPEND_BITS(*ol = encodeCount(out, olen, *ol, last_len - NICE_LEN));
        SAFE_APPEND_BITS(*ol = encodeCount(out, olen, *ol, last_dist));
        SAFE_APPEND_BITS(*ol = encodeCount(out, olen, *ol, last_ctx));
        /*
        if ((*ol - last_ol) > (last_len * 4)) {
          last_len = 0;
          *ol = last_ol;
        }*/
        //printf("Len: %d, Dist: %d, Line: %d\n", last_len, last_dist, last_ctx);
        j += last_len;
      }
    }
    line_ctr++;
    prev_lines = prev_lines->previous;
  } while (prev_lines && prev_lines->data != NULL);
  if (last_len) {
    l += last_len;
    l--;
    return l;
  }
  return -l;
}

/// Returns 4 bit code assuming ch falls between '0' to '9', \n
/// 'A' to 'F' or 'a' to 'f'
uint8_t getBaseCode(char ch) {
  if (ch >= '0' && ch <= '9')
    return (ch - '0') << 4;
  else if (ch >= 'A' && ch <= 'F')
    return (ch - 'A' + 10) << 4;
  else if (ch >= 'a' && ch <= 'f')
    return (ch - 'a' + 10) << 4;
  return 0;
}

/// Enum indicating nibble type - USX_NIB_NUM means ch is a number '0' to '9', \n
/// USX_NIB_HEX_LOWER means ch is between 'a' to 'f', \n
/// USX_NIB_HEX_UPPER means ch is between 'A' to 'F'
enum {USX_NIB_NUM = 0, USX_NIB_HEX_LOWER, USX_NIB_HEX_UPPER, USX_NIB_NOT};
/// Gets 4 bit code assuming ch falls between '0' to '9', \n
/// 'A' to 'F' or 'a' to 'f'
char getNibbleType(char ch) {
  if (ch >= '0' && ch <= '9')
    return USX_NIB_NUM;
  else if (ch >= 'a' && ch <= 'f')
    return USX_NIB_HEX_LOWER;
  else if (ch >= 'A' && ch <= 'F')
    return USX_NIB_HEX_UPPER;
  return USX_NIB_NOT;
}

/// Starts coding of nibble sets
int append_nibble_escape(char *out, int olen, int ol, uint8_t state, const uint8_t usx_hcodes[], const uint8_t usx_hcode_lens[]) {
  SAFE_APPEND_BITS(ol = append_switch_code(out, olen, ol, state));
  SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, usx_hcodes[USX_NUM], usx_hcode_lens[USX_NUM]));
  SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, 0, 2));
  return ol;
}

/// Returns minimum value of two longs
long min_of(long c, long i) {
  return c > i ? i : c;
}

/// Appends the terminator code depending on the state, preset and whether full terminator needs to be encoded to out or not \n
int append_final_bits(char *const out, const int olen, int ol, const uint8_t state, const uint8_t is_all_upper, const uint8_t usx_hcodes[], const uint8_t usx_hcode_lens[]) {
  if (usx_hcode_lens[USX_ALPHA]) {
    if (USX_NUM != state) {
      // for num state, append TERM_CODE directly
      // for other state, switch to Num Set first
      SAFE_APPEND_BITS(ol = append_switch_code(out, olen, ol, state));
      SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, usx_hcodes[USX_NUM], usx_hcode_lens[USX_NUM]));
    }
    SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, usx_vcodes[TERM_CODE & 0x1F], usx_vcode_lens[TERM_CODE & 0x1F]));
  } else {
    // preset 1, terminate at 2 or 3 SW_CODE, i.e., 4 or 6 continuous 0 bits
    // see discussion: https://github.com/siara-cc/Unishox/issues/19#issuecomment-922435580
    SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, TERM_BYTE_PRESET_1, is_all_upper ? TERM_BYTE_PRESET_1_LEN_UPPER : TERM_BYTE_PRESET_1_LEN_LOWER));
  }

  // fill uint8_t with the last bit
  SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, (ol == 0 || out[(ol-1)/8] << ((ol-1)&7) >= 0) ? 0 : 0xFF, (8 - ol % 8) & 7));

  return ol;
}

/// Macro used in the main compress function so that if the output len exceeds given maximum length (olen) it can exit
#define SAFE_APPEND_BITS2(olen, exp) do { \
  const int newidx = (exp); \
  const int __olen = (olen); \
  if (newidx < 0) return __olen >= 0 ? __olen + 1 : (1 - __olen) * 4; \
} while (0)

// Main API function. See unishox2.h for documentation
int unishox2_compress_lines(const char *in, int len, UNISHOX_API_OUT_AND_LEN(char *out, int olen), const uint8_t usx_hcodes[], const uint8_t usx_hcode_lens[], const char *usx_freq_seq[], const char *usx_templates[], struct us_lnk_lst *prev_lines) {

  uint8_t state;

  int l, ll, ol;
  char c_in, c_next;
  int prev_uni;
  uint8_t is_upper, is_all_upper;
#if (UNISHOX_API_OUT_AND_LEN(0,1)) == 0
  const int olen = INT_MAX - 1;
  const int rawolen = olen;
  const uint8_t need_full_term_codes = 0;
#else
  const int rawolen = olen;
  uint8_t need_full_term_codes = 0;
  if (olen < 0) {
    need_full_term_codes = 1;
    olen *= -1;
  }
#endif

  init_coder();
  ol = 0;
  prev_uni = 0;
  state = USX_ALPHA;
  is_all_upper = 0;
  SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, UNISHOX_MAGIC_BITS, UNISHOX_MAGIC_BIT_LEN)); // magic bit(s)
  for (l=0; l<len; l++) {

    if (usx_hcode_lens[USX_DICT] && l < (len - NICE_LEN + 1)) {
      if (prev_lines) {
        l = matchLine(in, len, l, out, olen, &ol, prev_lines, &state, usx_hcodes, usx_hcode_lens);
        if (l > 0) {
          continue;
        } else if (l < 0 && ol < 0) {
          return olen + 1;
        }
        l = -l;
      } else {
          l = matchOccurance(in, len, l, out, olen, &ol, &state, usx_hcodes, usx_hcode_lens);
          if (l > 0) {
            continue;
          } else if (l < 0 && ol < 0) {
            return olen + 1;
          }
          l = -l;
      }
    }

    c_in = in[l];
    if (l && len > 4 && l < (len - 4) && usx_hcode_lens[USX_NUM]) {
      if (c_in == in[l - 1] && c_in == in[l + 1] && c_in == in[l + 2] && c_in == in[l + 3]) {
        int rpt_count = l + 4;
        while (rpt_count < len && in[rpt_count] == c_in)
          rpt_count++;
        rpt_count -= l;
        SAFE_APPEND_BITS2(rawolen, ol = append_code(out, olen, ol, RPT_CODE, &state, usx_hcodes, usx_hcode_lens));
        SAFE_APPEND_BITS2(rawolen, ol = encodeCount(out, olen, ol, rpt_count - 4));
        l += rpt_count;
        l--;
        continue;
      }
    }

    if (l <= (len - 36) && usx_hcode_lens[USX_NUM]) {
      if (in[l + 8] == '-' && in[l + 13] == '-' && in[l + 18] == '-' && in[l + 23] == '-') {
        char hex_type = USX_NIB_NUM;
        int uid_pos = l;
        for (; uid_pos < l + 36; uid_pos++) {
          char c_uid = in[uid_pos];
          if (c_uid == '-' && (uid_pos == 8 || uid_pos == 13 || uid_pos == 18 || uid_pos == 23))
            continue;
          char nib_type = getNibbleType(c_uid);
          if (nib_type == USX_NIB_NOT)
            break;
          if (nib_type != USX_NIB_NUM) {
            if (hex_type != USX_NIB_NUM && hex_type != nib_type)
              break;
            hex_type = nib_type;
          }
        }
        if (uid_pos == l + 36) {
          SAFE_APPEND_BITS2(rawolen, ol = append_nibble_escape(out, olen, ol, state, usx_hcodes, usx_hcode_lens));
          SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, (hex_type == USX_NIB_HEX_LOWER ? 0xC0 : 0xF0),
                 (hex_type == USX_NIB_HEX_LOWER ? 3 : 5)));
          for (uid_pos = l; uid_pos < l + 36; uid_pos++) {
            char c_uid = in[uid_pos];
            if (c_uid != '-')
              SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, getBaseCode(c_uid), 4));
          }
          //printf("GUID:\n");
          l += 35;
          continue;
        }
      }
    }

    if (l < (len - 5) && usx_hcode_lens[USX_NUM]) {
      char hex_type = USX_NIB_NUM;
      int hex_len = 0;
      do {
        char nib_type = getNibbleType(in[l + hex_len]);
        if (nib_type == USX_NIB_NOT)
          break;
        if (nib_type != USX_NIB_NUM) {
          if (hex_type != USX_NIB_NUM && hex_type != nib_type)
            break;
          hex_type = nib_type;
        }
        hex_len++;
      } while (l + hex_len < len);
      if (hex_len > 10 && hex_type == USX_NIB_NUM)
        hex_type = USX_NIB_HEX_LOWER;
      if ((hex_type == USX_NIB_HEX_LOWER || hex_type == USX_NIB_HEX_UPPER) && hex_len > 3) {
        SAFE_APPEND_BITS2(rawolen, ol = append_nibble_escape(out, olen, ol, state, usx_hcodes, usx_hcode_lens));
        SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, (hex_type == USX_NIB_HEX_LOWER ? 0x80 : 0xE0), (hex_type == USX_NIB_HEX_LOWER ? 2 : 4)));
        SAFE_APPEND_BITS2(rawolen, ol = encodeCount(out, olen, ol, hex_len));
        do {
          SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, getBaseCode(in[l++]), 4));
        } while (--hex_len);
        l--;
        continue;
      }
    }

    if (usx_templates != NULL) {
      int i;
      for (i = 0; i < 5; i++) {
        if (usx_templates[i]) {
          int rem = (int)strlen(usx_templates[i]);
          int j = 0;
          for (; j < rem && l + j < len; j++) {
            char c_t = usx_templates[i][j];
            c_in = in[l + j];
            if (c_t == 'f' || c_t == 'F') {
              if (getNibbleType(c_in) != (c_t == 'f' ? USX_NIB_HEX_LOWER : USX_NIB_HEX_UPPER)
                       && getNibbleType(c_in) != USX_NIB_NUM) {
                break;
              }
            } else
            if (c_t == 'r' || c_t == 't' || c_t == 'o') {
              if (c_in < '0' || c_in > (c_t == 'r' ? '7' : (c_t == 't' ? '3' : '1')))
                break;
            } else
            if (c_t != c_in)
              break;
          }
          if (((float)j / rem) > 0.66) {
            //printf("%s\n", usx_templates[i]);
            rem = rem - j;
            SAFE_APPEND_BITS2(rawolen, ol = append_nibble_escape(out, olen, ol, state, usx_hcodes, usx_hcode_lens));
            SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, 0, 1));
            SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, (count_codes[i] & 0xF8), count_codes[i] & 0x07));
            SAFE_APPEND_BITS2(rawolen, ol = encodeCount(out, olen, ol, rem));
            for (int k = 0; k < j; k++) {
              char c_t = usx_templates[i][k];
              if (c_t == 'f' || c_t == 'F')
                SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, getBaseCode(in[l + k]), 4));
              else if (c_t == 'r' || c_t == 't' || c_t == 'o') {
                c_t = (c_t == 'r' ? 3 : (c_t == 't' ? 2 : 1));
                SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, (in[l + k] - '0') << (8 - c_t), c_t));
              }
            }
            l += j;
            l--;
            break;
          }
        }
      }
      if (i < 5)
        continue;
    }

    if (usx_freq_seq != NULL) {
      int i;
      for (i = 0; i < 6; i++) {
        int seq_len = (int)strlen(usx_freq_seq[i]);
        if (len - seq_len >= 0 && l <= len - seq_len) {
          if (memcmp(usx_freq_seq[i], in + l, seq_len) == 0 && usx_hcode_lens[usx_freq_codes[i] >> 5]) {
            SAFE_APPEND_BITS2(rawolen, ol = append_code(out, olen, ol, usx_freq_codes[i], &state, usx_hcodes, usx_hcode_lens));
            l += seq_len;
            l--;
            break;
          }
        }
      }
      if (i < 6)
        continue;
    }

    c_in = in[l];

    is_upper = 0;
    if (c_in >= 'A' && c_in <= 'Z')
      is_upper = 1;
    else {
      if (is_all_upper) {
        is_all_upper = 0;
        SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state));
        SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA]));
        state = USX_ALPHA;
      }
    }
    if (is_upper && !is_all_upper) {
      if (state == USX_NUM) {
        SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state));
        SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA]));
        state = USX_ALPHA;
      }
      SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state));
      SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA]));
      if (state == USX_DELTA) {
        state = USX_ALPHA;
        SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state));
        SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA]));
      }
    }
    c_next = 0;
    if (l+1 < len)
      c_next = in[l+1];

    if (c_in >= 32 && c_in <= 126) {
      if (is_upper && !is_all_upper) {
        for (ll=l+4; ll>=l && ll<len; ll--) {
          if (in[ll] < 'A' || in[ll] > 'Z')
            break;
        }
        if (ll == l-1) {
          SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state));
          SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA]));
          state = USX_ALPHA;
          is_all_upper = 1;
        }
      }
      if (state == USX_DELTA && (c_in == ' ' || c_in == '.' || c_in == ',')) {
        uint8_t spl_code = (c_in == ',' ? 0xC0 : (c_in == '.' ? 0xE0 : (c_in == ' ' ? 0 : 0xFF)));
        if (spl_code != 0xFF) {
          uint8_t spl_code_len = (c_in == ',' ? 3 : (c_in == '.' ? 4 : (c_in == ' ' ? 1 : 4)));
          SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, UNI_STATE_SPL_CODE, UNI_STATE_SPL_CODE_LEN));
          SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, spl_code, spl_code_len));
          continue;
        }
      }
      c_in -= 32;
      if (is_all_upper && is_upper)
        c_in += 32;
      if (c_in == 0) {
        if (state == USX_NUM)
          SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_vcodes[NUM_SPC_CODE & 0x1F], usx_vcode_lens[NUM_SPC_CODE & 0x1F]));
        else
          SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_vcodes[1], usx_vcode_lens[1]));
      } else {
        c_in--;
        SAFE_APPEND_BITS2(rawolen, ol = append_code(out, olen, ol, usx_code_94[(int)c_in], &state, usx_hcodes, usx_hcode_lens));
      }
    } else
    if (c_in == 13 && c_next == 10) {
      SAFE_APPEND_BITS2(rawolen, ol = append_code(out, olen, ol, CRLF_CODE, &state, usx_hcodes, usx_hcode_lens));
      l++;
    } else
    if (c_in == 10) {
      if (state == USX_DELTA) {
        SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, UNI_STATE_SPL_CODE, UNI_STATE_SPL_CODE_LEN));
        SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, 0xF0, 4));
      } else
        SAFE_APPEND_BITS2(rawolen, ol = append_code(out, olen, ol, LF_CODE, &state, usx_hcodes, usx_hcode_lens));
    } else
    if (c_in == 13) {
      SAFE_APPEND_BITS2(rawolen, ol = append_code(out, olen, ol, CR_CODE, &state, usx_hcodes, usx_hcode_lens));
    } else
    if (c_in == '\t') {
      SAFE_APPEND_BITS2(rawolen, ol = append_code(out, olen, ol, TAB_CODE, &state, usx_hcodes, usx_hcode_lens));
    } else {
      int utf8len;
      int32_t uni = readUTF8(in, len, l, &utf8len);
      if (uni) {
        l += utf8len;
        if (state != USX_DELTA) {
          int32_t uni2 = readUTF8(in, len, l, &utf8len);
          if (uni2) {
            if (state != USX_ALPHA) {
              SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state));
              SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA]));
            }
            SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state));
            SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA]));
            SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_vcodes[1], usx_vcode_lens[1])); // code for space (' ')
            state = USX_DELTA;
          } else {
            SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state));
            SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_hcodes[USX_DELTA], usx_hcode_lens[USX_DELTA]));
          }
        }
        SAFE_APPEND_BITS2(rawolen, ol = encodeUnicode(out, olen, ol, uni, prev_uni));
        //printf("%d:%d:%d\n", l, utf8len, uni);
        prev_uni = uni;
        l--;
      } else {
        int bin_count = 1;
        for (int bi = l + 1; bi < len; bi++) {
          char c_bi = in[bi];
          //if (c_bi > 0x1F && c_bi != 0x7F)
          //  break;
          if (readUTF8(in, len, bi, &utf8len))
            break;
          if (bi < (len - 4) && c_bi == in[bi - 1] && c_bi == in[bi + 1] && c_bi == in[bi + 2] && c_bi == in[bi + 3])
            break;
          bin_count++;
        }
        //printf("Bin:%d:%d:%x:%d\n", l, (unsigned char) c_in, (unsigned char) c_in, bin_count);
        SAFE_APPEND_BITS2(rawolen, ol = append_nibble_escape(out, olen, ol, state, usx_hcodes, usx_hcode_lens));
        SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, 0xF8, 5));
        SAFE_APPEND_BITS2(rawolen, ol = encodeCount(out, olen, ol, bin_count));
        do {
          SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, in[l++], 8));
        } while (--bin_count);
        l--;
      }
    }
  }

  if (need_full_term_codes) {
    const int orig_ol = ol;
    SAFE_APPEND_BITS2(rawolen, ol = append_final_bits(out, olen, ol, state, is_all_upper, usx_hcodes, usx_hcode_lens));
    return (ol / 8) * 4 + (((ol-orig_ol)/8) & 3);
  } else {
    const int rst = (ol + 7) / 8;
    append_final_bits(out, rst, ol, state, is_all_upper, usx_hcodes, usx_hcode_lens);
    return rst;
  }
}

// Main API function. See unishox2.h for documentation
int unishox2_compress(const char *in, int len, UNISHOX_API_OUT_AND_LEN(char *out, int olen), const uint8_t usx_hcodes[], const uint8_t usx_hcode_lens[], const char *usx_freq_seq[], const char *usx_templates[]) {
  return unishox2_compress_lines(in, len, UNISHOX_API_OUT_AND_LEN(out, olen), usx_hcodes, usx_hcode_lens, usx_freq_seq, usx_templates, NULL);
}

// Main API function. See unishox2.h for documentation
int unishox2_compress_simple(const char *in, int len, char *out) {
  return unishox2_compress_lines(in, len, UNISHOX_API_OUT_AND_LEN(out, INT_MAX - 1), USX_HCODES_DFLT, USX_HCODE_LENS_DFLT, USX_FREQ_SEQ_DFLT, USX_TEMPLATES, NULL);
}

// Reads one bit from in
int readBit(const char *in, int bit_no) {
   return in[bit_no >> 3] & (0x80 >> (bit_no % 8));
}

// Reads next 8 bits, if available
int read8bitCode(const char *in, int len, int bit_no) {
  int bit_pos = bit_no & 0x07;
  int char_pos = bit_no >> 3;
  len >>= 3;
  uint8_t code = (((uint8_t)in[char_pos]) << bit_pos);
  char_pos++;
  if (char_pos < len) {
    code |= ((uint8_t)in[char_pos]) >> (8 - bit_pos);
  } else
    code |= (0xFF >> (8 - bit_pos));
  return code;
}

/// The list of veritical codes is split into 5 sections. Used by readVCodeIdx()
#define SECTION_COUNT 5
/// Used by readVCodeIdx() for finding the section under which the code read using read8bitCode() falls
uint8_t usx_vsections[] = {0x7F, 0xBF, 0xDF, 0xEF, 0xFF};
/// Used by readVCodeIdx() for finding the section vertical position offset
uint8_t usx_vsection_pos[] = {0, 4, 8, 12, 20};
/// Used by readVCodeIdx() for masking the code read by read8bitCode()
uint8_t usx_vsection_mask[] = {0x7F, 0x3F, 0x1F, 0x0F, 0x0F};
/// Used by readVCodeIdx() for shifting the code read by read8bitCode() to obtain the vpos
uint8_t usx_vsection_shift[] = {5, 4, 3, 1, 0};

/// Vertical decoder lookup table - 3 bits code len, 5 bytes vertical pos
/// code len is one less as 8 cannot be accommodated in 3 bits
uint8_t usx_vcode_lookup[36] = {
  (1 << 5) + 0,  (1 << 5) + 0,  (2 << 5) + 1,  (2 << 5) + 2,  // Section 1
  (3 << 5) + 3,  (3 << 5) + 4,  (3 << 5) + 5,  (3 << 5) + 6,  // Section 2
  (3 << 5) + 7,  (3 << 5) + 7,  (4 << 5) + 8,  (4 << 5) + 9,  // Section 3
  (5 << 5) + 10, (5 << 5) + 10, (5 << 5) + 11, (5 << 5) + 11, // Section 4
  (5 << 5) + 12, (5 << 5) + 12, (6 << 5) + 13, (6 << 5) + 14,
  (6 << 5) + 15, (6 << 5) + 15, (6 << 5) + 16, (6 << 5) + 16, // Section 5
  (6 << 5) + 17, (6 << 5) + 17, (7 << 5) + 18, (7 << 5) + 19,
  (7 << 5) + 20, (7 << 5) + 21, (7 << 5) + 22, (7 << 5) + 23,
  (7 << 5) + 24, (7 << 5) + 25, (7 << 5) + 26, (7 << 5) + 27
};

/// Decodes the vertical code from the given bitstream at in \n
/// This is designed to use less memory using a 36 uint8_t buffer \n
/// compared to using a 256 uint8_t buffer to decode the next 8 bits read by read8bitCode() \n
/// by splitting the list of vertical codes. \n
/// Decoder is designed for using less memory, not speed. \n
/// Returns the veritical code index or 99 if match could not be found. \n
/// Also updates bit_no_p with how many ever bits used by the vertical code.
int readVCodeIdx(const char *in, int len, int *bit_no_p) {
  if (*bit_no_p < len) {
    uint8_t code = read8bitCode(in, len, *bit_no_p);
    int i = 0;
    do {
      if (code <= usx_vsections[i]) {
        uint8_t vcode = usx_vcode_lookup[usx_vsection_pos[i] + ((code & usx_vsection_mask[i]) >> usx_vsection_shift[i])];
        (*bit_no_p) += ((vcode >> 5) + 1);
        if (*bit_no_p > len)
          return 99;
        return vcode & 0x1F;
      }
    } while (++i < SECTION_COUNT);
  }
  return 99;
}

/// Mask for retrieving each code to be decoded according to its length \n
/// Same as usx_mask so redundant
uint8_t len_masks[] = {0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF};
/// Decodes the horizontal code from the given bitstream at in \n
/// depending on the hcodes defined using usx_hcodes and usx_hcode_lens \n
/// Returns the horizontal code index or 99 if match could not be found. \n
/// Also updates bit_no_p with how many ever bits used by the horizontal code.
int readHCodeIdx(const char *in, int len, int *bit_no_p, const uint8_t usx_hcodes[], const uint8_t usx_hcode_lens[]) {
  if (!usx_hcode_lens[USX_ALPHA])
    return USX_ALPHA;
  if (*bit_no_p < len) {
    uint8_t code = read8bitCode(in, len, *bit_no_p);
    for (int code_pos = 0; code_pos < 5; code_pos++) {
      if (usx_hcode_lens[code_pos] && (code & len_masks[usx_hcode_lens[code_pos] - 1]) == usx_hcodes[code_pos]) {
        *bit_no_p += usx_hcode_lens[code_pos];
        return code_pos;
      }
    }
  }
  return 99;
}

// TODO: Last value check.. Also len check in readBit
/// Returns the position of step code (0, 10, 110, etc.) encountered in the stream
int getStepCodeIdx(const char *in, int len, int *bit_no_p, int limit) {
  int idx = 0;
  while (*bit_no_p < len && readBit(in, *bit_no_p)) {
    idx++;
    (*bit_no_p)++;
    if (idx == limit)
      return idx;
  }
  if (*bit_no_p >= len)
    return 99;
  (*bit_no_p)++;
  return idx;
}

/// Reads specified number of bits and builds the corresponding integer
int32_t getNumFromBits(const char *in, int len, int bit_no, int count) {
   int32_t ret = 0;
   while (count-- && bit_no < len) {
     ret += (readBit(in, bit_no) ? 1 << count : 0);
     bit_no++;
   }
   return count < 0 ? ret : -1;
}

/// Decodes the count from the given bit stream at in. Also updates bit_no_p
int32_t readCount(const char *in, int *bit_no_p, int len) {
  int idx = getStepCodeIdx(in, len, bit_no_p, 4);
  if (idx == 99)
    return -1;
  if (*bit_no_p + count_bit_lens[idx] - 1 >= len)
    return -1;
  int32_t count = getNumFromBits(in, len, *bit_no_p, count_bit_lens[idx]) + (idx ? count_adder[idx - 1] : 0);
  (*bit_no_p) += count_bit_lens[idx];
  return count;
}

/// Decodes the Unicode codepoint from the given bit stream at in. Also updates bit_no_p \n
/// When the step code is 5, reads the next step code to find out the special code.
int32_t readUnicode(const char *in, int *bit_no_p, int len) {
  int idx = getStepCodeIdx(in, len, bit_no_p, 5);
  if (idx == 99)
    return 0x7FFFFF00 + 99;
  if (idx == 5) {
    idx = getStepCodeIdx(in, len, bit_no_p, 4);
    return 0x7FFFFF00 + idx;
  }
  if (idx >= 0) {
    int sign = (*bit_no_p < len ? readBit(in, *bit_no_p) : 0);
    (*bit_no_p)++;
    if (*bit_no_p + uni_bit_len[idx] - 1 >= len)
      return 0x7FFFFF00 + 99;
    int32_t count = getNumFromBits(in, len, *bit_no_p, uni_bit_len[idx]);
    count += uni_adder[idx];
    (*bit_no_p) += uni_bit_len[idx];
    //printf("Sign: %d, Val:%d", sign, count);
    return sign ? -count : count;
  }
  return 0;
}

/// Macro to ensure that the decoder does not append more than olen bytes to out
#define DEC_OUTPUT_CHAR(out, olen, ol, c) do { \
  char *const obuf = (out); \
  const int oidx = (ol); \
  const int limit = (olen); \
  if (limit <= oidx) return limit + 1; \
  else if (oidx < 0) return 0; \
  else obuf[oidx] = (c); \
} while (0)

/// Macro to ensure that the decoder does not append more than olen bytes to out
#define DEC_OUTPUT_CHARS(olen, exp) do { \
  const int newidx = (exp); \
  const int limit = (olen); \
  if (newidx > limit) return limit + 1; \
} while (0)

/// Write given unicode code point to out as a UTF-8 sequence
int writeUTF8(char *out, int olen, int ol, int uni) {
  if (uni < (1 << 11)) {
    DEC_OUTPUT_CHAR(out, olen, ol++, 0xC0 + (uni >> 6));
    DEC_OUTPUT_CHAR(out, olen, ol++, 0x80 + (uni & 0x3F));
  } else
  if (uni < (1 << 16)) {
    DEC_OUTPUT_CHAR(out, olen, ol++, 0xE0 + (uni >> 12));
    DEC_OUTPUT_CHAR(out, olen, ol++, 0x80 + ((uni >> 6) & 0x3F));
    DEC_OUTPUT_CHAR(out, olen, ol++, 0x80 + (uni & 0x3F));
  } else {
    DEC_OUTPUT_CHAR(out, olen, ol++, 0xF0 + (uni >> 18));
    DEC_OUTPUT_CHAR(out, olen, ol++, 0x80 + ((uni >> 12) & 0x3F));
    DEC_OUTPUT_CHAR(out, olen, ol++, 0x80 + ((uni >> 6) & 0x3F));
    DEC_OUTPUT_CHAR(out, olen, ol++, 0x80 + (uni & 0x3F));
  }
  return ol;
}

/// Decode repeating sequence and appends to out
int decodeRepeat(const char *in, int len, char *out, int olen, int ol, int *bit_no, struct us_lnk_lst *prev_lines) {
  if (prev_lines) {
    int32_t dict_len = readCount(in, bit_no, len) + NICE_LEN;
    if (dict_len < NICE_LEN)
      return -1;
    int32_t dist = readCount(in, bit_no, len);
    if (dist < 0)
      return -1;
    int32_t ctx = readCount(in, bit_no, len);
    if (ctx < 0)
      return -1;
    struct us_lnk_lst *cur_line = prev_lines;
    const int left = olen - ol;
    while (ctx-- && cur_line)
      cur_line = cur_line->previous;
    if (cur_line == NULL)
      return -1;
    if (left <= 0) return olen + 1;
    if (dist >= strlen(cur_line->data))
      return -1;
    memmove(out + ol, cur_line->data + dist, min_of(left, dict_len));
    if (left < dict_len) return olen + 1;
    ol += dict_len;
  } else {
    int32_t dict_len = readCount(in, bit_no, len) + NICE_LEN;
    if (dict_len < NICE_LEN)
      return -1;
    int32_t dist = readCount(in, bit_no, len) + NICE_LEN - 1;
    if (dist < NICE_LEN - 1)
      return -1;
    const int32_t left = olen - ol;
    //printf("Decode len: %d, dist: %d\n", dict_len - NICE_LEN, dist - NICE_LEN + 1);
    if (left <= 0) return olen + 1;
    if (ol - dist < 0)
      return -1;
    memmove(out + ol, out + ol - dist, min_of(left, dict_len));
    if (left < dict_len) return olen + 1;
    ol += dict_len;
  }
  return ol;
}

/// Returns hex character corresponding to the 4 bit nibble
char getHexChar(int32_t nibble, int hex_type) {
  if (nibble >= 0 && nibble <= 9)
    return '0' + nibble;
  else if (hex_type < USX_NIB_HEX_UPPER)
    return 'a' + nibble - 10;
  return 'A' + nibble - 10;
}

// Main API function. See unishox2.h for documentation
int unishox2_decompress_lines(const char *in, int len, UNISHOX_API_OUT_AND_LEN(char *out, int olen), const uint8_t usx_hcodes[], const uint8_t usx_hcode_lens[], const char *usx_freq_seq[], const char *usx_templates[], struct us_lnk_lst *prev_lines) {

  int dstate;
  int bit_no;
  int h, v;
  uint8_t is_all_upper;
#if (UNISHOX_API_OUT_AND_LEN(0,1)) == 0
  const int olen = INT_MAX - 1;
#endif

  init_coder();
  int ol = 0;
  bit_no = UNISHOX_MAGIC_BIT_LEN; // ignore the magic bit
  dstate = h = USX_ALPHA;
  is_all_upper = 0;

  int prev_uni = 0;

  len <<= 3;
  while (bit_no < len) {
    int orig_bit_no = bit_no;
    if (dstate == USX_DELTA || h == USX_DELTA) {
      if (dstate != USX_DELTA)
        h = dstate;
      int32_t delta = readUnicode(in, &bit_no, len);
      if ((delta >> 8) == 0x7FFFFF) {
        int spl_code_idx = delta & 0x000000FF;
        if (spl_code_idx == 99)
          break;
        switch (spl_code_idx) {
          case 0:
            DEC_OUTPUT_CHAR(out, olen, ol++, ' ');
            continue;
          case 1:
            h = readHCodeIdx(in, len, &bit_no, usx_hcodes, usx_hcode_lens);
            if (h == 99) {
              bit_no = len;
              continue;
            }
            if (h == USX_DELTA || h == USX_ALPHA) {
              dstate = h;
              continue;
            }
            if (h == USX_DICT) {
              int rpt_ret = decodeRepeat(in, len, out, olen, ol, &bit_no, prev_lines);
              if (rpt_ret < 0)
                return ol; // if we break here it will only break out of switch
              DEC_OUTPUT_CHARS(olen, ol = rpt_ret);
              h = dstate;
              continue;
            }
            break;
          case 2:
            DEC_OUTPUT_CHAR(out, olen, ol++, ',');
            continue;
          case 3:
            DEC_OUTPUT_CHAR(out, olen, ol++, '.');
            continue;
          case 4:
            DEC_OUTPUT_CHAR(out, olen, ol++, 10);
            continue;
        }
      } else {
        prev_uni += delta;
        DEC_OUTPUT_CHARS(olen, ol = writeUTF8(out, olen, ol, prev_uni));
        //printf("%ld, ", prev_uni);
      }
      if (dstate == USX_DELTA && h == USX_DELTA)
        continue;
    } else
      h = dstate;
    char c = 0;
    uint8_t is_upper = is_all_upper;
    v = readVCodeIdx(in, len, &bit_no);
    if (v == 99 || h == 99) {
      bit_no = orig_bit_no;
      break;
    }
    if (v == 0 && h != USX_SYM) {
      if (bit_no >= len)
        break;
      if (h != USX_NUM || dstate != USX_DELTA) {
        h = readHCodeIdx(in, len, &bit_no, usx_hcodes, usx_hcode_lens);
        if (h == 99 || bit_no >= len) {
          bit_no = orig_bit_no;
          break;
        }
      }
      if (h == USX_ALPHA) {
         if (dstate == USX_ALPHA) {
           if (!usx_hcode_lens[USX_ALPHA] && TERM_BYTE_PRESET_1 == (read8bitCode(in, len, bit_no - SW_CODE_LEN) & (0xFF << (8 - (is_all_upper ? TERM_BYTE_PRESET_1_LEN_UPPER : TERM_BYTE_PRESET_1_LEN_LOWER)))))
             break; // Terminator for preset 1
           if (is_all_upper) {
             is_upper = is_all_upper = 0;
             continue;
           }
           v = readVCodeIdx(in, len, &bit_no);
           if (v == 99) {
             bit_no = orig_bit_no;
             break;
           }
           if (v == 0) {
              h = readHCodeIdx(in, len, &bit_no, usx_hcodes, usx_hcode_lens);
              if (h == 99) {
                bit_no = orig_bit_no;
                break;
              }
              if (h == USX_ALPHA) {
                 is_all_upper = 1;
                 continue;
              }
           }
           is_upper = 1;
         } else {
            dstate = USX_ALPHA;
            continue;
         }
      } else
      if (h == USX_DICT) {
        int rpt_ret = decodeRepeat(in, len, out, olen, ol, &bit_no, prev_lines);
        if (rpt_ret < 0)
          break;
        DEC_OUTPUT_CHARS(olen, ol = rpt_ret);
        continue;
      } else
      if (h == USX_DELTA) {
        //printf("Sign: %d, bitno: %d\n", sign, bit_no);
        //printf("Code: %d\n", prev_uni);
        //printf("BitNo: %d\n", bit_no);
        continue;
      } else {
        if (h != USX_NUM || dstate != USX_DELTA)
          v = readVCodeIdx(in, len, &bit_no);
        if (v == 99) {
          bit_no = orig_bit_no;
          break;
        }
        if (h == USX_NUM && v == 0) {
          int idx = getStepCodeIdx(in, len, &bit_no, 5);
          if (idx == 99)
            break;
          if (idx == 0) {
            idx = getStepCodeIdx(in, len, &bit_no, 4);
            if (idx >= 5)
              break;
            int32_t rem = readCount(in, &bit_no, len);
            if (rem < 0)
              break;
            if (usx_templates[idx] == NULL)
              break;
            size_t tlen = strlen(usx_templates[idx]);
            if (rem > tlen)
              break;
            rem = tlen - rem;
            int eof = 0;
            for (int j = 0; j < rem; j++) {
              char c_t = usx_templates[idx][j];
              if (c_t == 'f' || c_t == 'r' || c_t == 't' || c_t == 'o' || c_t == 'F') {
                  char nibble_len = (c_t == 'f' || c_t == 'F' ? 4 : (c_t == 'r' ? 3 : (c_t == 't' ? 2 : 1)));
                  const int32_t raw_char = getNumFromBits(in, len, bit_no, nibble_len);
                  if (raw_char < 0) {
                      eof = 1;
                      break;
                  }
                  DEC_OUTPUT_CHAR(out, olen, ol++, getHexChar((char)raw_char,
                      c_t == 'f' ? USX_NIB_HEX_LOWER : USX_NIB_HEX_UPPER));
                  bit_no += nibble_len;
              } else
                DEC_OUTPUT_CHAR(out, olen, ol++, c_t);
            }
            if (eof) break; // reach input eof
          } else
          if (idx == 5) {
            int32_t bin_count = readCount(in, &bit_no, len);
            if (bin_count < 0)
              break;
            if (bin_count == 0) // invalid encoding
              break;
            do {
              const int32_t raw_char = getNumFromBits(in, len, bit_no, 8);
              if (raw_char < 0)
                  break;
              DEC_OUTPUT_CHAR(out, olen, ol++, (char)raw_char);
              bit_no += 8;
            } while (--bin_count);
            if (bin_count > 0) break; // reach input eof
          } else {
            int32_t nibble_count = 0;
            if (idx == 2 || idx == 4)
              nibble_count = 32;
            else {
              nibble_count = readCount(in, &bit_no, len);
              if (nibble_count < 0)
                break;
              if (nibble_count == 0) // invalid encoding
                break;
            }
            do {
              int32_t nibble = getNumFromBits(in, len, bit_no, 4);
              if (nibble < 0)
                  break;
              DEC_OUTPUT_CHAR(out, olen, ol++, getHexChar(nibble, idx < 3 ? USX_NIB_HEX_LOWER : USX_NIB_HEX_UPPER));
              if ((idx == 2 || idx == 4) && (nibble_count == 25 || nibble_count == 21 || nibble_count == 17 || nibble_count == 13))
                DEC_OUTPUT_CHAR(out, olen, ol++, '-');
              bit_no += 4;
            } while (--nibble_count);
            if (nibble_count > 0) break; // reach input eof
          }
          if (dstate == USX_DELTA)
            h = USX_DELTA;
          continue;
        }
      }
    }
    if (is_upper && v == 1) {
      h = dstate = USX_DELTA; // continuous delta coding
      continue;
    }
    if (h < 3 && v < 28)
      c = usx_sets[h][v];
    if (c >= 'a' && c <= 'z') {
      dstate = USX_ALPHA;
      if (is_upper)
        c -= 32;
    } else {
      if (c >= '0' && c <= '9') {
        dstate = USX_NUM;
      } else if (c == 0) {
        if (v == 8) {
          DEC_OUTPUT_CHAR(out, olen, ol++, '\r');
          DEC_OUTPUT_CHAR(out, olen, ol++, '\n');
        } else if (h == USX_NUM && v == 26) {
          int32_t count = readCount(in, &bit_no, len);
          if (count < 0)
            break;
          count += 4;
          if (ol <= 0)
            return 0; // invalid encoding
          char rpt_c = out[ol - 1];
          while (count--)
            DEC_OUTPUT_CHAR(out, olen, ol++, rpt_c);
        } else if (h == USX_SYM && v > 24) {
          v -= 25;
          const int freqlen = (int)strlen(usx_freq_seq[v]);
          const int left = olen - ol;
          if (left <= 0) return olen + 1;
          memcpy(out + ol, usx_freq_seq[v], min_of(left, freqlen));
          if (left < freqlen) return olen + 1;
          ol += freqlen;
        } else if (h == USX_NUM && v > 22 && v < 26) {
          v -= (23 - 3);
          const int freqlen = (int)strlen(usx_freq_seq[v]);
          const int left = olen - ol;
          if (left <= 0) return olen + 1;
          memcpy(out + ol, usx_freq_seq[v], min_of(left, freqlen));
          if (left < freqlen) return olen + 1;
          ol += freqlen;
        } else
          break; // Terminator
        if (dstate == USX_DELTA)
          h = USX_DELTA;
        continue;
      }
    }
    if (dstate == USX_DELTA)
      h = USX_DELTA;
    DEC_OUTPUT_CHAR(out, olen, ol++, c);
  }

  return ol;

}

// Main API function. See unishox2.h for documentation
int unishox2_decompress(const char *in, int len, UNISHOX_API_OUT_AND_LEN(char *out, int olen), const uint8_t usx_hcodes[], const uint8_t usx_hcode_lens[], const char *usx_freq_seq[], const char *usx_templates[]) {
  return unishox2_decompress_lines(in, len, UNISHOX_API_OUT_AND_LEN(out, olen), usx_hcodes, usx_hcode_lens, usx_freq_seq, usx_templates, NULL);
}

// Main API function. See unishox2.h for documentation
int unishox2_decompress_simple(const char *in, int len, char *out) {
  return unishox2_decompress(in, len, UNISHOX_API_OUT_AND_LEN(out, INT_MAX - 1), USX_PSET_DFLT);
}