// FIXME: make get functions const uint8_t *

/*
 * Copyright (c) 2019-2021 Genome Research Ltd.
 * Author(s): James Bonfield
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *    1. Redistributions of source code must retain the above copyright notice,
 *       this list of conditions and the following disclaimer.
 *
 *    2. Redistributions in binary form must reproduce the above
 *       copyright notice, this list of conditions and the following
 *       disclaimer in the documentation and/or other materials provided
 *       with the distribution.
 *
 *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
 *       Institute nor the names of its contributors may be used to endorse
 *       or promote products derived from this software without specific
 *       prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
 * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef VARINT_H
#define VARINT_H

#include <stdint.h>

#ifdef VARINT2
#include "varint2.h"
#else

// General API scheme is var_{get,put}_{s,u}{32,64}
// s/u for signed/unsigned;  32/64 for integer size.

// FIXME: consider returning the value and having nbytes passed in by
// reference instead of vice-versa.
//
// ie uint64_t var_get_u64(uint8_t *cp, int *nbytes)
// vs int      var_get_u64(uint8_t *cp, uint64_t *val)
//
// The return value can then be assigned to 32-bit or 64-bit type
// without need of a new function name.  The cost is we can't then
// do "cp += var_get_u32(cp, endp, &u_freq_sz);".  Maybe we can't do
// overflow detection with former? (Want 32-bit but got, say, 40 bit)


// Big endian.
// Harder for encoding, but a simpler and faster decoder.
#define BIG_END
#ifdef BIG_END

static inline
int var_put_u64_safe(uint8_t *cp, const uint8_t *endp, uint64_t i) {
    uint8_t *op = cp;
    int s = 0;
    uint64_t X = i;

    // safe method when we're near end of buffer
    do {
	s += 7;
	X >>= 7;
    } while (X);

    if (endp && (endp-cp)*7 < s)
	return 0;

    int n;
    for (n = 0; n < 10; n++) {
	s -= 7;
	*cp++ = ((i>>s) & 0x7f) + (s?128:0);
	if (!s)
	    break;
    }

    return cp-op;
}

// This can be optimised further with __builtin_clzl(i) and goto various
// bits of the if/else-if structure, but it's not a vast improvement and
// we are dominated by small values.  Simplicity wins for now
static inline
int var_put_u64(uint8_t *cp, const uint8_t *endp, uint64_t i) {
    if (endp && (endp-cp) < 10)
	return var_put_u64_safe(cp, endp, i);

    // maximum of 10 bytes written
    if (i < (1<<7)) {
	*cp = i;
	return 1;
    } else if (i < (1<<14)) {
	*cp++ = ((i>> 7) & 0x7f) | 128;
	*cp++ =   i      & 0x7f;
	return 2;
    } else if (i < (1<<21)) {
	*cp++ = ((i>>14) & 0x7f) | 128;
	*cp++ = ((i>> 7) & 0x7f) | 128;
	*cp++ =   i      & 0x7f;
	return 3;
    } else if (i < (1<<28)) {
	*cp++ = ((i>>21) & 0x7f) | 128;
	*cp++ = ((i>>14) & 0x7f) | 128;
	*cp++ = ((i>> 7) & 0x7f) | 128;
	*cp++ =   i      & 0x7f;
	return 4;
    } else if (i < (1LL<<35)) {
	*cp++ = ((i>>28) & 0x7f) | 128;
	*cp++ = ((i>>21) & 0x7f) | 128;
	*cp++ = ((i>>14) & 0x7f) | 128;
	*cp++ = ((i>> 7) & 0x7f) | 128;
	*cp++ =   i      & 0x7f;
	return 5;
    } else if (i < (1LL<<42)) {
	*cp++ = ((i>>35) & 0x7f) | 128;
	*cp++ = ((i>>28) & 0x7f) | 128;
	*cp++ = ((i>>21) & 0x7f) | 128;
	*cp++ = ((i>>14) & 0x7f) | 128;
	*cp++ = ((i>> 7) & 0x7f) | 128;
	*cp++ =   i      & 0x7f;
	return 6;
    } else if (i < (1LL<<49)) {
	*cp++ = ((i>>42) & 0x7f) | 128;
	*cp++ = ((i>>35) & 0x7f) | 128;
	*cp++ = ((i>>28) & 0x7f) | 128;
	*cp++ = ((i>>21) & 0x7f) | 128;
	*cp++ = ((i>>14) & 0x7f) | 128;
	*cp++ = ((i>> 7) & 0x7f) | 128;
	*cp++ =   i      & 0x7f;
	return 7;
    } else if (i < (1LL<<56)) {
	*cp++ = ((i>>49) & 0x7f) | 128;
	*cp++ = ((i>>42) & 0x7f) | 128;
	*cp++ = ((i>>35) & 0x7f) | 128;
	*cp++ = ((i>>28) & 0x7f) | 128;
	*cp++ = ((i>>21) & 0x7f) | 128;
	*cp++ = ((i>>14) & 0x7f) | 128;
	*cp++ = ((i>> 7) & 0x7f) | 128;
	*cp++ =   i      & 0x7f;
	return 8;
    } else if (i < (1LL<<63)) {
	*cp++ = ((i>>56) & 0x7f) | 128;
	*cp++ = ((i>>49) & 0x7f) | 128;
	*cp++ = ((i>>42) & 0x7f) | 128;
	*cp++ = ((i>>35) & 0x7f) | 128;
	*cp++ = ((i>>28) & 0x7f) | 128;
	*cp++ = ((i>>21) & 0x7f) | 128;
	*cp++ = ((i>>14) & 0x7f) | 128;
	*cp++ = ((i>> 7) & 0x7f) | 128;
	*cp++ =   i      & 0x7f;
	return 9;
    } else {
	*cp++ = ((i>>63) & 0x7f) | 128;
	*cp++ = ((i>>56) & 0x7f) | 128;
	*cp++ = ((i>>49) & 0x7f) | 128;
	*cp++ = ((i>>42) & 0x7f) | 128;
	*cp++ = ((i>>35) & 0x7f) | 128;
	*cp++ = ((i>>28) & 0x7f) | 128;
	*cp++ = ((i>>21) & 0x7f) | 128;
	*cp++ = ((i>>14) & 0x7f) | 128;
	*cp++ = ((i>> 7) & 0x7f) | 128;
	*cp++ =   i      & 0x7f;
    }

    return 10;
}

static inline
int var_put_u32_safe(uint8_t *cp, const uint8_t *endp, uint32_t i) {
    uint8_t *op = cp;
    int s = 0;
    uint32_t X = i;

    // safe method when we're near end of buffer
    do {
	s += 7;
	X >>= 7;
    } while (X);

    if (endp && (endp-cp)*7 < s)
	return 0;

    int n;
    for (n = 0; n < 5; n++) {
	s -= 7;
	*cp++ = ((i>>s) & 0x7f) + (s?128:0);
	if (!s)
	    break;
    }

    return cp-op;
}

static inline
int var_put_u32(uint8_t *cp, const uint8_t *endp, uint32_t i) {
    if (endp && (endp-cp) < 5)
	return var_put_u32_safe(cp, endp, i);

    if (i < (1<<7)) {
	*cp = i;
	return 1;
    } else if (i < (1<<14)) {
	*cp++ = ((i>> 7) & 0x7f) | 128;
	*cp++ =   i      & 0x7f;
	return 2;
    } else if (i < (1<<21)) {
	*cp++ = ((i>>14) & 0x7f) | 128;
	*cp++ = ((i>> 7) & 0x7f) | 128;
	*cp++ =   i      & 0x7f;
	return 3;
    } else if (i < (1<<28)) {
	*cp++ = ((i>>21) & 0x7f) | 128;
	*cp++ = ((i>>14) & 0x7f) | 128;
	*cp++ = ((i>> 7) & 0x7f) | 128;
	*cp++ =   i      & 0x7f;
	return 4;
    } else {
	*cp++ = ((i>>28) & 0x7f) | 128;
	*cp++ = ((i>>21) & 0x7f) | 128;
	*cp++ = ((i>>14) & 0x7f) | 128;
	*cp++ = ((i>> 7) & 0x7f) | 128;
	*cp++ =   i      & 0x7f;
    }

    return 5;
}

static inline
int var_get_u64(uint8_t *cp, const uint8_t *endp, uint64_t *i) {
    uint8_t *op = cp, c;
    uint64_t j = 0;

    if (!endp || endp - cp >= 10) {
	int n = 10;
	do {
	    c = *cp++;
	    j = (j<<7) | (c & 0x7f);
	} while ((c & 0x80) && n-- > 0);
    } else {
	if (cp >= endp) {
	    *i = 0;
	    return 0;
	}

	do {
	    c = *cp++;
	    j = (j<<7) | (c & 0x7f);
	} while ((c & 0x80) && cp < endp);
    }

    *i = j;
    return cp-op;
}

static inline
int var_get_u32(uint8_t *cp, const uint8_t *endp, uint32_t *i) {
    uint8_t *op = cp, c;
    uint32_t j = 0;

    if (!endp || endp - cp >= 6) {
	// Known maximum loop count helps optimiser.
	// NB: this helps considerably at -O3 level, but may harm -O2.
	// (However we optimise for those that want optimal code.)
	int n = 5;
	do {
	    c = *cp++;
	    j = (j<<7) | (c & 0x7f);
	} while ((c & 0x80) && n-- > 0);
    } else {
	if (cp >= endp) {
	    *i = 0;
	    return 0;
	}

	if (*cp < 128) {
	    *i = *cp;
	    return 1;
	}

	do {
	    c = *cp++;
	    j = (j<<7) | (c & 0x7f);
	} while ((c & 0x80) && cp < endp);
    }

    *i = j;
    return cp-op;
}

//-----------------------------------------------------------------------------
#else // BIG_END

// Little endian 7-bit variable sized integer encoding.
// The unsigned value is equivalent to LEB128 encoding.
// For signed, see below.
// This is also the Google Protocol Buffer and WebAssembly format.
static inline int var_put_u64(uint8_t *cp, const uint8_t *endp, uint64_t i) {
    uint8_t *op = cp;

    if (!endp || (endp-cp)*7 >= 10) {
	// Unsafe or big-enough anyway
	do {
	    *cp++ = (i&0x7f) + ((i>=0x80)<<7);
	    i >>= 7;
	} while (i);
    } else if (cp < endp) {
	// End checked variant
	do {
	    *cp++ = (i&0x7f) + ((i>=0x80)<<7);
	    i >>= 7;
	} while (i && cp < endp);
    }

    return cp-op;
}

static inline int var_put_u32(uint8_t *cp, const uint8_t *endp, uint32_t i) {
    uint8_t *op = cp;

    if (!endp || (endp-cp)*7 >= 5) {
	// Unsafe or big-enough anyway
	do {
	    *cp++ = (i&0x7f) + ((i>=0x80)<<7);
	    i >>= 7;
	} while (i);
    } else if (cp < endp) {
	// End checked variant
	do {
	    *cp++ = (i&0x7f) + ((i>=0x80)<<7);
	    i >>= 7;
	} while (i && cp < endp);
    }

    return cp-op;
}

static inline int var_get_u64(uint8_t *cp, const uint8_t *endp, uint64_t *i) {
    uint8_t *op = cp, c;
    uint64_t j = 0, s = 0;

    if (endp) {
	// Safe variant
	if (cp >= endp) {
	    *i = 0;
	    return 0;
	}

	do {
	    c = *cp++;
	    j |= (c & 0x7f) << s;
	    s += 7;
	} while ((c & 0x80) && cp < endp);
    } else {
	// Unsafe variant
	do {
	    c = *cp++;
	    j |= (c & 0x7f) << s;
	    s += 7;
	} while ((c & 0x80));
    }

    *i = j;
    return cp-op;
}

static inline int var_get_u32(uint8_t *cp, const uint8_t *endp, uint32_t *i) {
    uint8_t *op = cp, c;
    uint32_t j = 0, s = 0;

    if (endp) {
	// Safe variant
	if (cp >= endp) {
	    *i = 0;
	    return 0;
	}

	do {
	    c = *cp++;
	    j |= (c & 0x7f) << s;
	    s += 7;
	} while ((c & 0x80) && cp < endp);
    } else {
	// Unsafe variant
	do {
	    c = *cp++;
	    j |= (c & 0x7f) << s;
	    s += 7;
	} while ((c & 0x80));
    }

    *i = j;
    return cp-op;
}
#endif // BIG_END

//-----------------------------------------------------------------------------
// Signed versions of the above using zig-zag integer encoding.
// This folds the sign bit into the bottom bit so we iterate
// 0, -1, +1, -2, +2, etc.
static inline int var_put_s32(uint8_t *cp, const uint8_t *endp, int32_t i) {
    return var_put_u32(cp, endp, ((uint32_t)i << 1) ^ (i >> 31));
}
static inline int var_put_s64(uint8_t *cp, const uint8_t *endp, int64_t i) {
    return var_put_u64(cp, endp, ((uint64_t)i << 1) ^ (i >> 63));
}

static inline int var_get_s32(uint8_t *cp, const uint8_t *endp, int32_t *i) {
    int b = var_get_u32(cp, endp, (uint32_t *)i);
    *i = ((uint32_t)*i >> 1) ^ -(int32_t)(*i & 1);
    return b;
}
static inline int var_get_s64(uint8_t *cp, const uint8_t *endp, int64_t *i) {
    int b = var_get_u64(cp, endp, (uint64_t *)i);
    *i = ((uint64_t)*i >> 1) ^ -(int64_t)(*i & 1);
    return b;
}

static inline int var_size_u64(uint64_t v) {
    int i = 0;
    do {
	i++;
	v >>= 7;
    } while (v);
    return i;
}
#define var_size_u32 var_size_u64

static inline int var_size_s64(int64_t v) {
    return var_size_u64(((uint64_t)v << 1) ^ (v >> 63));
}
#define var_size_s32 var_size_s64

#endif /* VARINT2 */

#endif /* VARINT_H */