#!/usr/bin/env python3

# Boolector: Satisfiablity Modulo Theories (SMT) solver.
#
# Copyright (C) 2007-2021 by the authors listed in the AUTHORS file.
#
# This file is part of Boolector.
# See COPYING for more information on using this software.
#

import copy
import os
import shlex
import shutil
import sys
import re
import time
from optparse import OptionParser
from subprocess import Popen, PIPE, TimeoutExpired

g_golden_err_msg = ""
g_golden_exit_code = 0
g_golden_runtime = 0
g_options = object
g_outfile = ""
g_tmpfile = "/tmp/ddmbt-" + str(os.getpid()) + ".trace"
g_tmpbin = "/tmp/ddmbt-bin-" + str(os.getpid())
g_command = [] 
g_num_tests = 0
g_lines = []        # contains the current failing trace
g_subst_lines = {}  # maps line number in 'g_lines' to line substitution
g_line_tokens = []
g_id2line = {}
g_node_map = {}
g_sort_map = {}

CONST_NODE_KINDS = ["const", "zero", "false", "ones", "true", "one",
                    "unsigned_int", "int", "min_signed", "max_signed"]

NODE_KINDS = ["copy", "const", "zero", "false", "ones", "true", "one",
              "unsigned_int", "int", "var", "array", "uf", "not", "neg",
              "redor", "redxor", "redand", "slice", "uext", "sext", "implies",
              "iff", "xor", "xnor", "and", "nand", "or", "nor", "eq", "ne",
              "add", "uaddo", "saddo", "mul", "umulo", "smulo", "ult", "slt",
              "ulte", "slte", "ugt", "sgt", "ugte", "sgte", "sll", "srl",
              "sra", "rol", "ror", "sub", "usubo", "ssubo", "udiv", "sdiv",
              "sdivo", "urem", "srem", "smod", "concat", "read", "write",
              "cond", "param", "fun", "apply", "inc", "dec", "repeat"]

SORT_KINDS = ["bool_sort", "bitvec_sort", "fun_sort", "array_sort",
              "tuple_sort"]

OTHER_KEYWORDS = [
    'new', 'return', 'sat', 'assume', 'assert', 'failed', 'set_opt', 'has_opt',
    'array_assignment', 'bv_assignment', 'uf_assignment',
    'free_array_assignment', 'free_bv_assignment', 'free_uf_assignment',
    'fixate_assumptions', 'fun_sort_check', 'set_sat_solver', 'simplify',
    'is_array', 'get_width', 'set_symbol', 'clone', 'dump_smt2', 'get_node_id',
    'get_bits', 'free_bits', 'get_index_width', 'is_bv_const_max_signed',
    'release', 'release_sort', 'chkclone', 'reset_assumptions', 'is_uf',
    'get_opt', 'is_bv_const_one', 'is_bv_const_zero', 'is_bv_const_min_signed',
    'is_bv_const_ones', 'is_var', 'roli', 'is_const', 'is_fun', 'is_bound_param'
]
VALID_KEYWORDS = []
VALID_KEYWORDS.extend(OTHER_KEYWORDS)
VALID_KEYWORDS.extend(CONST_NODE_KINDS)
VALID_KEYWORDS.extend(NODE_KINDS)
VALID_KEYWORDS.extend(SORT_KINDS)

class LineTokens: 
    def __init__(self, id, line):
        tokens = line.split()
        self.id = id
        self.kind = tokens[0]
        if self.kind not in VALID_KEYWORDS:
            print("warning: invalid keyword '{}'".format(self.kind))
        if len(tokens) > 1:
            self.btor = tokens[1]
        else:
            self.btor = ""
        self.tokens = tokens

        if self.is_node_kind():
            self.bw = _node_bw(self.tokens)
            self.children = [c for c in tokens if _is_node(c)]
        else:
            self.bw = None 
            self.children = []

    def is_node_kind(self):
        return self.kind in NODE_KINDS or self.kind in CONST_NODE_KINDS

    def is_sort_kind(self):
        return self.kind in SORT_KINDS

    def __str__(self):
        return "token:: id: {}, kind: {}, bw: {}, children: {}".format(
                self.id, self.kind, self.bw, self.children)


def _parse_options():
    usage_fmt = "%prog [options] INFILE OUTFILE \"CMD [options]\""

    p = OptionParser(usage=usage_fmt)

    p.add_option("-v", action="count", dest="verbosity", default=0, 
                 help="increase verbosity level")

    (options, args) = p.parse_args()

    if len(args) != 3:
        p.error("invalid number of arguments")

    return (options, args)

def _is_node(id):
    global g_node_map
    return id in g_node_map

def _sort_bw(id):
    global g_sort_map

    t = g_sort_map[id][:]
    del[t[1]]
    sort = t[0]

    if sort == "bool_sort":
        return 1
    elif sort == "bitvec_sort":
        return int(t[1])
    elif sort in ["fun_sort", "array_sort"]:
        return [_sort_bw(s) for s in t[1:]]
    elif sort == "tuple_sort":
        pass
    elif sort == "array_sort":
        pass
    return 0

def _tokens_children(tokens):
    return [c for c in tokens if _is_node(c)]

def _node_bw(tokens):
    t = tokens[:]
    del[t[1]]
    tokens = t
    kind = tokens[0]
    bw = 0
    if kind in ["var", "param", "zero", "one", "ones", "uf", "array",
                "min_signed", "max_signed"]:
        bw = _sort_bw(tokens[1])
    elif kind in ["int", "unsigned_int"]:
        bw = _sort_bw(tokens[2])
    elif kind == "slice":
        bw = int(tokens[2]) - int(tokens[3]) + 1
    elif kind in ["true", "false",
                  "redor", "redand", "redxor",
                  "implies", "iff",
                  "eq", "ne", "umulo", "smulo",
                  "ult", "slt", "ulte", "slte",
                  "ugt", "sgt", "ugte", "sgte",
                  "uaddo", "saddo", "usubo", "ssubo", "sdivo"]:
        bw = 1 
    else:
        children = _tokens_children(tokens)
        cbw = [g_node_map[c].bw for c in children]
        if kind == "cond":
            assert(cbw[1] == cbw[2])
            bw = cbw[1]
        elif kind == "read":
            assert(len(cbw[0]) == 2)
            bw = cbw[0][1]
            assert(not isinstance(bw, list))
        elif kind == "write":
            assert(len(cbw[0]) == 2)
            bw = cbw[0]
        elif kind in ["not", "inc", "dec"]:
            assert(len(cbw) == 1)
            bw = cbw[0]
        elif kind in ["sext", "uext"]:
            bw = cbw[0] + int(tokens[2])
        elif kind == "repeat":
            bw = cbw[0] * int(tokens[2])
        elif kind == "fun":
            bw = cbw
        elif kind == "apply":
            bw = cbw[-1][-1]
            assert(not isinstance(bw, list))
        elif kind in ["rol", "ror", "neg", "sll", "srl", "sra"]:
            bw = cbw[0]
        elif kind == "concat":
            assert(len(cbw) == 2)
            bw = cbw[0] + cbw[1]
        elif kind == "const":
            bw = len(tokens[1])
        elif kind == "copy":
            bw = cbw[0]
        else:
            assert len(cbw) == 2
            assert cbw[0] == cbw[1]
            bw = cbw[0]
            assert not isinstance(bw, list)
    return bw

def _build_graph():
    global g_lines, g_line_tokens, g_id2line, g_node_map, g_sort_map

    g_line_tokens = []
    g_node_map = {}
    g_sort_map = {}
    prev_ltok = None
    for i in range(len(g_lines)):
        line = g_lines[i]
        cur_ltok = LineTokens(None, line)

        if cur_ltok.kind == "return" and not "assignment" in prev_ltok.kind:
            id = cur_ltok.tokens[1]
            if prev_ltok.is_node_kind():
                g_node_map[id] = prev_ltok
                assert i > 0
                assert g_line_tokens[i - 1].id == None
                g_line_tokens[i - 1].id = id
                g_id2line[id] = i - 1
            elif prev_ltok.is_sort_kind():
                g_sort_map[id] = prev_ltok.tokens

        prev_ltok = LineTokens(None, line)
        g_line_tokens.append(prev_ltok)

    for id, node in g_node_map.items():
        assert id != None
        assert node.id == id

def _parse_trace(inputfile):
    global g_lines

    try:
        with open(inputfile, 'r') as infile:
            for line in infile:
                g_lines.append(line.strip())

        _log(1, "parsed {0:d} lines".format(len(g_lines)))

    except IOError as err:
        _error_and_exit(err)

def _open_file_dump_trace(filename, lines):
    try:
        with open(filename, 'w') as outfile:
            _dump_trace(outfile, lines)
    except IOError as err:
        _error_and_exit(err)

def _dump_trace(outfile, lines):
    global g_subst_lines

    for i in range(len(lines)):
        # TODO: sort handling
        # skip sort lines for now
        skip = False
        m = re.search('_sort$', lines[i].split()[0])
        if m is not None:
            skip = True
        if not skip:
            m = re.match('^return s\d+', lines[i])
            if m is not None:
                skip = True

        if skip:
            skip = True

        if i in g_subst_lines and not skip:
            line = g_subst_lines[i]
        else:
            line = lines[i]

        outfile.write("{}\n".format(line))

def _run(initial=False):
    global g_command
    try:
        subproc = Popen(g_command, stdout=PIPE, stderr=PIPE)
        try:
            if initial:
                _, msg_err = subproc.communicate()
            else:
                timeout = max([g_golden_runtime * 2, 1])
                _, msg_err = subproc.communicate(timeout=timeout)
        except TimeoutExpired:
            subproc.kill()
            _, msg_err = subproc.communicate()
        return (subproc.returncode, msg_err)
    except OSError as err:
        _error_and_exit(err)

def _test():
    global g_golden_exit_code, g_num_tests, g_golden_err_msg

    g_num_tests += 1
    exitcode, err_msg = _run()
    return exitcode == g_golden_exit_code and err_msg == g_golden_err_msg

def _save(lines):
    global g_outfile

    _open_file_dump_trace(g_outfile, lines)

def _cleanup():
    os.remove(g_tmpfile)
    os.remove(g_tmpbin)

def _log(verbosity, msg, update=False):
    global g_options

    if g_options.verbosity >= verbosity:
        sys.stdout.write(" " * 80 + "\r")
        if update:
            sys.stdout.write("[ddmbt] {0:s}\r".format(msg))
            sys.stdout.flush()
        else:
            sys.stdout.write("[ddmbt] {0:s}\n".format(msg))

def _error_and_exit(msg):
    sys.exit("[ddmbt] error: " + msg)

def _remove_lines():
    global g_lines

    num_lines = 0
    gran = len(g_lines)
    while gran >= 1:
        cur_lines = g_lines[:]
        last_saved_lines = g_lines[:]
        offset = 0
        while offset < len(cur_lines):
            lower = offset
            upper = offset + gran
            subset = cur_lines[lower:upper]
            lines = cur_lines[:lower]
            lines.extend(cur_lines[upper:])
            assert len(subset) == len(cur_lines) - len(lines)
            assert subset
            assert len(lines) < len(cur_lines)

            _open_file_dump_trace(g_tmpfile, lines)

            if _test():
                _save(lines)
                cur_lines = lines
                last_saved_lines = cur_lines[:]
                num_lines += len(subset)
            else:
                offset += gran

            _log(1, "  granularity {}, offset {}, lines {}".format(gran,
                 offset, len(last_saved_lines)), True)

        g_lines = last_saved_lines 
        gran = gran // 2

    num_lines += _remove_return_pairs()

    return num_lines 

def _compact_graph():
    global g_line_tokens, g_subst_lines, g_lines

    num_substs = 0
    g_subst_lines = {}

    _build_graph()
    assert(len(g_lines) == len(g_line_tokens))

    # insert vars
    for i in range(len(g_line_tokens)):
        id = g_line_tokens[i].id
        tokens = g_line_tokens[i].tokens
        kind = tokens[0]

        if kind in CONST_NODE_KINDS or \
           kind in ["var", "param", "uf", "fun"] or not _is_node(id):
            continue

        btor = tokens[1]
        bw = g_line_tokens[i].bw

        _log(1, "  insert var at line {}, {}".format(i, num_substs), True)
        g_subst_lines[i] = "var {} {} v{}".format(btor, bw, i) 
        _open_file_dump_trace(g_tmpfile, g_lines)

        if _test():
            _save(g_lines)
            g_lines[i] = g_subst_lines[i]
            num_substs += 1

        del(g_subst_lines[i])

    return num_substs

def _swap_candidates(nid):
    global g_line_tokens, g_id2line
    assert(nid in g_node_map)

    i = g_id2line[nid]
    bw = g_line_tokens[i].bw
    assert (bw != None)
    candidates = []
    cache = {}
    for j in range(i):
        ltok = g_line_tokens[j]
        if ltok.id != None and ltok.bw == bw and ltok.id not in cache and \
           ltok.id < nid:
            cache[ltok.id] = True
            candidates.append(ltok.id)

    return candidates


def _swap_ids():
    global g_line_tokens, g_subst_lines, g_lines

    num_substs = 0
    g_subst_lines = {}

    _build_graph()
    assert(len(g_lines) == len(g_line_tokens))

    for i in range(len(g_line_tokens)):
        ltok = g_line_tokens[i]

        if ltok.id is None:
            continue

        tokens = [str(t) for t in ltok.tokens]
        for child in ltok.children:
            assert(_is_node(child))
            if len(g_node_map[child].children) == 0:
                continue
            candidates = _swap_candidates(child)

            for c in candidates:
                assert(child != c)
                swapped = False
                pos = 0
                while pos < len(tokens):
                    if tokens[pos] == child:
                        tokens[pos] = c
                        swapped = True
                        break
                    pos += 1

                if not swapped:
                    continue

                g_subst_lines[i] = " ".join(tokens)
                assert(g_subst_lines[i] != g_lines[i])
                _log(1, "  line {}, swap nodes {}".format(i, num_substs), True)
                _open_file_dump_trace(g_tmpfile, g_lines)

                if _test():
                    _save(g_lines)
                    g_lines[i] = g_subst_lines[i]
                    num_substs += 1
                    break
                else:
                    tokens[pos] = child

                del(g_subst_lines[i])

    return num_substs


def _remove_return_pairs():
    global g_lines

    num_lines = 0
    cur_lines = g_lines[:]
    last_saved_lines = g_lines[:]

    offset = 0
    while offset < len(cur_lines):
        line = cur_lines[offset]
        kind = line.split()[0]
        if offset + 1 < len(cur_lines):
            next_line = cur_lines[offset + 1]
        else:
            next_line = None

        if kind in OTHER_KEYWORDS:
            cur_lines.pop(offset)
            if next_line and next_line.startswith('return'):
                cur_lines.pop(offset)
        else:
            offset += 1

    _open_file_dump_trace(g_tmpfile, cur_lines)

    if _test():
        _save(cur_lines)
        last_saved_lines = cur_lines[:]
        num_lines = len(g_lines) - len(cur_lines)
    else:
        cur_lines = g_lines[:]
        last_saved_lines = g_lines[:]

    if num_lines == 0:
        offset = 0
        while offset < len(cur_lines):
            line = cur_lines[offset]
            kind = line.split()[0]
            if offset + 1 < len(cur_lines):
                next_line = cur_lines[offset + 1]
            else:
                next_line = None

            if kind in OTHER_KEYWORDS and \
                next_line and next_line.startswith('return'):

                lines = cur_lines[:]
                lines.pop(offset)
                lines.pop(offset)
                _open_file_dump_trace(g_tmpfile, lines)

                if _test():
                    _save(lines)
                    cur_lines = lines
                    last_saved_lines = cur_lines[:]
                    num_lines += 2
                    continue

            offset += 1
            _log(1, "  pairs: offset {}, lines {}".format(
                offset, len(last_saved_lines)), True)

    g_lines = last_saved_lines
    return num_lines


def ddmbt_main():
    global g_options, g_golden_exit_code, g_tmpfile, g_outfile, g_command
    global g_num_tests, g_nodes, g_golden_err_msg

    (g_options, args) = _parse_options()
    inputfile = args[0]
    g_outfile = args[1]
    g_command = shlex.split(args[2])

    if not os.path.exists(inputfile):
        _error_and_exit("'input file {0:s}' does not exist".format(inputfile))

    if os.path.exists(g_outfile):
        _error_and_exit("output file '{0:s}' exists".format(g_outfile))

    _log(1, "input file: '{0:s}'".format(inputfile))
    _log(1, "output file: '{0:s}'".format(g_outfile))

    _parse_trace(args[0])
    num_lines_start = len(g_lines)

    # copy inputfile to tmpfile
    shutil.copyfile(inputfile, g_tmpfile)
    g_command.append(g_tmpfile)
    # copy binary to tmpfile
    shutil.copyfile(g_command[0], g_tmpbin)
    # copy permissons of binary
    shutil.copymode(g_command[0], g_tmpbin)
    # replace binary with temporary one
    g_command[0] = g_tmpbin

    start = time.time()
    g_golden_exit_code, g_golden_err_msg = _run(True)
    g_golden_runtime = time.time() - start
    _log(1, "golden exit code: {0:d} in {1:.3f} seconds".format(
            g_golden_exit_code, g_golden_runtime))
    _log(1, "golden err msg: {}".format(g_golden_err_msg))

    rounds = 0
    num_swap_only_rounds = 0
    while True:
        rounds += 1
        num_substs = 0
        num_swaps = 0
        num_lines = _remove_lines()
        if num_lines > 0:
            num_swap_only_rounds = 0
        else:
            num_substs = _compact_graph()
            if num_substs > 0:
                num_swap_only_rounds = 0
            num_swaps = _swap_ids()
            if num_lines == 0 and num_substs == 0:
                num_swap_only_rounds += 1


        _log(1, "round {}, removed {} lines, substituted {} lines, "\
                "swapped {} nodes".format(rounds, num_lines,
                                          num_substs, num_swaps))

        if num_lines == 0 and num_substs == 0 and num_swaps == 0:
            break

    _log(1, "tests: {0:d}".format(g_num_tests))
    _log(1, "dumped {0:d} lines (removed {1:.1f}% of lines)".format(
         len(g_lines), (1 - len(g_lines)/num_lines_start) * 100))

if __name__ == "__main__":
    try:
        ddmbt_main()
        _cleanup()
        sys.exit(0)
    except KeyboardInterrupt as err:
        _cleanup()
        sys.exit(err)