import json import xgboost as xgb import pytest import tempfile import sys import numpy as np import os sys.path.append("tests/python") import testing as tm # noqa import test_with_sklearn as twskl # noqa pytestmark = pytest.mark.skipif(**tm.no_sklearn()) rng = np.random.RandomState(1994) def test_gpu_binary_classification(): from sklearn.datasets import load_digits from sklearn.model_selection import KFold digits = load_digits(n_class=2) y = digits['target'] X = digits['data'] kf = KFold(n_splits=2, shuffle=True, random_state=rng) for cls in (xgb.XGBClassifier, xgb.XGBRFClassifier): for train_index, test_index in kf.split(X, y): xgb_model = cls( random_state=42, tree_method='gpu_hist', n_estimators=4, gpu_id='0').fit(X[train_index], y[train_index]) preds = xgb_model.predict(X[test_index]) labels = y[test_index] err = sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(len(preds)) assert err < 0.1 @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.skipif(**tm.no_cudf()) def test_boost_from_prediction_gpu_hist(): from sklearn.datasets import load_breast_cancer, load_digits import cupy as cp import cudf tree_method = "gpu_hist" X, y = load_breast_cancer(return_X_y=True) X, y = cp.array(X), cp.array(y) twskl.run_boost_from_prediction_binary(tree_method, X, y, None) twskl.run_boost_from_prediction_binary(tree_method, X, y, cudf.DataFrame) X, y = load_digits(return_X_y=True) X, y = cp.array(X), cp.array(y) twskl.run_boost_from_prediction_multi_clasas( xgb.XGBClassifier, tree_method, X, y, None ) twskl.run_boost_from_prediction_multi_clasas( xgb.XGBClassifier, tree_method, X, y, cudf.DataFrame ) def test_num_parallel_tree(): twskl.run_calif_housing_rf_regression("gpu_hist") @pytest.mark.skipif(**tm.no_pandas()) @pytest.mark.skipif(**tm.no_cudf()) @pytest.mark.skipif(**tm.no_sklearn()) def test_categorical(): import pandas as pd import cudf import cupy as cp from sklearn.datasets import load_svmlight_file data_dir = os.path.join(tm.PROJECT_ROOT, "demo", "data") X, y = load_svmlight_file(os.path.join(data_dir, "agaricus.txt.train")) clf = xgb.XGBClassifier( tree_method="gpu_hist", enable_categorical=True, n_estimators=10, ) X = pd.DataFrame(X.todense()).astype("category") clf.fit(X, y) with tempfile.TemporaryDirectory() as tempdir: model = os.path.join(tempdir, "categorial.json") clf.save_model(model) with open(model) as fd: categorical = json.load(fd) categories_sizes = np.array( categorical["learner"]["gradient_booster"]["model"]["trees"][0][ "categories_sizes" ] ) assert categories_sizes.shape[0] != 0 np.testing.assert_allclose(categories_sizes, 1) def check_predt(X, y): reg = xgb.XGBRegressor( tree_method="gpu_hist", enable_categorical=True, n_estimators=64 ) reg.fit(X, y) predts = reg.predict(X) booster = reg.get_booster() assert "c" in booster.feature_types assert len(booster.feature_types) == 1 inp_predts = booster.inplace_predict(X) if isinstance(inp_predts, cp.ndarray): inp_predts = cp.asnumpy(inp_predts) np.testing.assert_allclose(predts, inp_predts) y = [1, 2, 3] X = pd.DataFrame({"f0": ["a", "b", "c"]}) X["f0"] = X["f0"].astype("category") check_predt(X, y) X = cudf.DataFrame(X) check_predt(X, y) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.skipif(**tm.no_cudf()) def test_classififer(): from sklearn.datasets import load_digits import cupy as cp import cudf X, y = load_digits(return_X_y=True) y *= 10 clf = xgb.XGBClassifier(tree_method="gpu_hist", n_estimators=1) # numpy with pytest.raises(ValueError, match=r"Invalid classes.*"): clf.fit(X, y) # cupy X, y = cp.array(X), cp.array(y) with pytest.raises(ValueError, match=r"Invalid classes.*"): clf.fit(X, y) # cudf X, y = cudf.DataFrame(X), cudf.DataFrame(y) with pytest.raises(ValueError, match=r"Invalid classes.*"): clf.fit(X, y) # pandas X, y = load_digits(return_X_y=True, as_frame=True) y *= 10 with pytest.raises(ValueError, match=r"Invalid classes.*"): clf.fit(X, y)