# -*- coding: utf-8 -*-
"""Suite of basic tests"""
import sys
import os
import itertools
import subprocess
from zipfile import ZipFile

import pytest
from scipy.sparse import csr_matrix
import treelite
import treelite_runtime
from treelite.contrib import _libext
from .metadata import dataset_db
from .util import os_platform, os_compatible_toolchains, does_not_raise, check_predictor


@pytest.mark.parametrize('dataset,use_annotation,parallel_comp,quantize,toolchain',
                         list(itertools.product(
                             ['mushroom', 'dermatology'], [True, False], [None, 4], [True, False],
                             os_compatible_toolchains())) +
                         [('letor', False, 713, True, os_compatible_toolchains()[0]),
                          ('toy_categorical', False, 30, True, os_compatible_toolchains()[0])])
def test_basic(tmpdir, annotation, dataset, use_annotation, quantize, parallel_comp, toolchain):
    # pylint: disable=too-many-arguments
    """Test 'ast_native' compiler"""

    if dataset == 'letor' and os_platform() == 'windows':
        pytest.xfail('export_lib() is too slow for letor on MSVC')

    libpath = os.path.join(tmpdir, dataset_db[dataset].libname + _libext())
    model = treelite.Model.load(dataset_db[dataset].model, model_format=dataset_db[dataset].format)
    annotation_path = os.path.join(tmpdir, 'annotation.json')

    if use_annotation:
        if annotation[dataset] is None:
            pytest.skip('No training data available. Skipping annotation')
        with open(annotation_path, 'w') as f:
            f.write(annotation[dataset])

    params = {
        'annotate_in': (annotation_path if use_annotation else 'NULL'),
        'quantize': (1 if quantize else 0),
        'parallel_comp': (parallel_comp if parallel_comp else 0)
    }
    model.export_lib(toolchain=toolchain, libpath=libpath, params=params, verbose=True)
    predictor = treelite_runtime.Predictor(libpath=libpath, verbose=True)
    check_predictor(predictor, dataset)


@pytest.mark.parametrize('toolchain', os_compatible_toolchains())
@pytest.mark.parametrize('use_elf', [True, False])
@pytest.mark.parametrize('dataset', ['mushroom', 'dermatology', 'letor', 'toy_categorical'])
def test_failsafe_compiler(tmpdir, dataset, use_elf, toolchain):
    """Test 'failsafe' compiler"""
    libpath = os.path.join(tmpdir, dataset_db[dataset].libname + _libext())
    model = treelite.Model.load(dataset_db[dataset].model, model_format=dataset_db[dataset].format)

    params = {'dump_array_as_elf': (1 if use_elf else 0)}

    is_linux = sys.platform.startswith('linux')
    # Expect Treelite to throw error if we try to use dump_array_as_elf on non-Linux OS
    # Also, failsafe compiler is only available for XGBoost models
    if ((not is_linux) and use_elf) or dataset_db[dataset].format != 'xgboost':
        expect_raises = pytest.raises(treelite.TreeliteError)
    else:
        expect_raises = does_not_raise()
    with expect_raises:
        model.export_lib(compiler='failsafe', toolchain=toolchain, libpath=libpath, params=params,
                         verbose=True)
        predictor = treelite_runtime.Predictor(libpath=libpath, verbose=True)
        check_predictor(predictor, dataset)


@pytest.mark.skipif(os_platform() == 'windows', reason='Make unavailable on Windows')
@pytest.mark.parametrize('toolchain', os_compatible_toolchains())
@pytest.mark.parametrize('dataset', ['mushroom', 'dermatology', 'letor', 'toy_categorical'])
def test_srcpkg(tmpdir, dataset, toolchain):
    """Test feature to export a source tarball"""
    pkgpath = os.path.join(tmpdir, 'srcpkg.zip')
    model = treelite.Model.load(dataset_db[dataset].model, model_format=dataset_db[dataset].format)
    model.export_srcpkg(platform=os_platform(), toolchain=toolchain,
                        pkgpath=pkgpath, libname=dataset_db[dataset].libname,
                        params={'parallel_comp': 700 if dataset == 'letor' else 4}, verbose=True)
    with ZipFile(pkgpath, 'r') as zip_ref:
        zip_ref.extractall(tmpdir)
    nproc = os.cpu_count()
    subprocess.check_call(['make', '-C', dataset_db[dataset].libname, f'-j{nproc}'], cwd=tmpdir)

    libpath = os.path.join(tmpdir, dataset_db[dataset].libname)
    predictor = treelite_runtime.Predictor(libpath=libpath, verbose=True)
    check_predictor(predictor, dataset)


@pytest.mark.parametrize('dataset', ['mushroom', 'dermatology', 'letor', 'toy_categorical'])
def test_srcpkg_cmake(tmpdir, dataset):  # pylint: disable=R0914
    """Test feature to export a source tarball"""
    pkgpath = os.path.join(tmpdir, 'srcpkg.zip')
    model = treelite.Model.load(dataset_db[dataset].model, model_format=dataset_db[dataset].format)
    model.export_srcpkg(platform=os_platform(), toolchain='cmake',
                        pkgpath=pkgpath, libname=dataset_db[dataset].libname,
                        params={'parallel_comp': 700 if dataset == 'letor' else 4}, verbose=True)
    with ZipFile(pkgpath, 'r') as zip_ref:
        zip_ref.extractall(tmpdir)
    build_dir = os.path.join(tmpdir, dataset_db[dataset].libname, 'build')
    os.mkdir(build_dir)
    nproc = os.cpu_count()
    win_opts = ['-A', 'x64'] if os_platform() == 'windows' else []
    subprocess.check_call(['cmake', '..'] + win_opts, cwd=build_dir)
    subprocess.check_call(['cmake', '--build', '.', '--config', 'Release',
                           '--parallel', str(nproc)], cwd=build_dir)

    predictor = treelite_runtime.Predictor(libpath=build_dir, verbose=True)
    check_predictor(predictor, dataset)


def test_deficient_matrix(tmpdir):
    """Test if Treelite correctly handles sparse matrix with fewer columns than the training data
    used for the model. In this case, the matrix should be padded with zeros."""
    libpath = os.path.join(tmpdir, dataset_db['mushroom'].libname + _libext())
    model = treelite.Model.load(dataset_db['mushroom'].model, model_format='xgboost')
    toolchain = os_compatible_toolchains()[0]
    model.export_lib(toolchain=toolchain, libpath=libpath, params={'quantize': 1}, verbose=True)

    X = csr_matrix(([], ([], [])), shape=(3, 3))
    dmat = treelite_runtime.DMatrix(X, dtype='float32')
    predictor = treelite_runtime.Predictor(libpath=libpath, verbose=True)
    assert predictor.num_feature == 127
    predictor.predict(dmat)  # should not crash


def test_too_wide_matrix(tmpdir):
    """Test if Treelite correctly handles sparse matrix with more columns than the training data
    used for the model. In this case, an exception should be thrown"""
    libpath = os.path.join(tmpdir, dataset_db['mushroom'].libname + _libext())
    model = treelite.Model.load(dataset_db['mushroom'].model, model_format='xgboost')
    toolchain = os_compatible_toolchains()[0]
    model.export_lib(toolchain=toolchain, libpath=libpath, params={'quantize': 1}, verbose=True)

    X = csr_matrix(([], ([], [])), shape=(3, 1000))
    dmat = treelite_runtime.DMatrix(X, dtype='float32')
    predictor = treelite_runtime.Predictor(libpath=libpath, verbose=True)
    assert predictor.num_feature == 127
    pytest.raises(treelite_runtime.TreeliteRuntimeError, predictor.predict, dmat)


def test_set_tree_limit():
    """Test Model.set_tree_limit"""
    model = treelite.Model.load(dataset_db['mushroom'].model, model_format='xgboost')
    assert model.num_tree == 2
    pytest.raises(treelite.TreeliteError, model.set_tree_limit, 0)
    pytest.raises(treelite.TreeliteError, model.set_tree_limit, 3)
    model.set_tree_limit(1)
    assert model.num_tree == 1

    model = treelite.Model.load(dataset_db['dermatology'].model, model_format='xgboost')
    pytest.raises(treelite.TreeliteError, model.set_tree_limit, 0)
    pytest.raises(treelite.TreeliteError, model.set_tree_limit, 200)
    assert model.num_tree == 60
    model.set_tree_limit(30)
    assert model.num_tree == 30
    model.set_tree_limit(10)
    assert model.num_tree == 10