# -*- coding: utf-8 -*- import numpy as np import xgboost import testing as tm import pytest dpath = 'demo/data/' rng = np.random.RandomState(1994) class TestInteractionConstraints: def run_interaction_constraints( self, tree_method, feature_names=None, interaction_constraints='[[0, 1]]' ): x1 = np.random.normal(loc=1.0, scale=1.0, size=1000) x2 = np.random.normal(loc=1.0, scale=1.0, size=1000) x3 = np.random.choice([1, 2, 3], size=1000, replace=True) y = x1 + x2 + x3 + x1 * x2 * x3 \ + np.random.normal( loc=0.001, scale=1.0, size=1000) + 3 * np.sin(x1) X = np.column_stack((x1, x2, x3)) dtrain = xgboost.DMatrix(X, label=y, feature_names=feature_names) params = { 'max_depth': 3, 'eta': 0.1, 'nthread': 2, 'interaction_constraints': interaction_constraints, 'tree_method': tree_method } num_boost_round = 12 # Fit a model that only allows interaction between x1 and x2 bst = xgboost.train( params, dtrain, num_boost_round, evals=[(dtrain, 'train')]) # Set all observations to have the same x3 values then increment # by the same amount def f(x): tmat = xgboost.DMatrix( np.column_stack((x1, x2, np.repeat(x, 1000))), feature_names=feature_names) return bst.predict(tmat) preds = [f(x) for x in [1, 2, 3]] # Check incrementing x3 has the same effect on all observations # since x3 is constrained to be independent of x1 and x2 # and all observations start off from the same x3 value diff1 = preds[1] - preds[0] assert np.all(np.abs(diff1 - diff1[0]) < 1e-4) diff2 = preds[2] - preds[1] assert np.all(np.abs(diff2 - diff2[0]) < 1e-4) def test_exact_interaction_constraints(self): self.run_interaction_constraints(tree_method='exact') def test_hist_interaction_constraints(self): self.run_interaction_constraints(tree_method='hist') def test_approx_interaction_constraints(self): self.run_interaction_constraints(tree_method='approx') def test_interaction_constraints_feature_names(self): with pytest.raises(ValueError): constraints = [('feature_0', 'feature_1')] self.run_interaction_constraints(tree_method='exact', interaction_constraints=constraints) with pytest.raises(ValueError): constraints = [('feature_0', 'feature_3')] feature_names = ['feature_0', 'feature_1', 'feature_2'] self.run_interaction_constraints(tree_method='exact', feature_names=feature_names, interaction_constraints=constraints) constraints = [('feature_0', 'feature_1')] feature_names = ['feature_0', 'feature_1', 'feature_2'] self.run_interaction_constraints(tree_method='exact', feature_names=feature_names, interaction_constraints=constraints) constraints = [['feature_0', 'feature_1'], ['feature_2']] feature_names = ['feature_0', 'feature_1', 'feature_2'] self.run_interaction_constraints(tree_method='exact', feature_names=feature_names, interaction_constraints=constraints) @pytest.mark.skipif(**tm.no_sklearn()) def training_accuracy(self, tree_method): """Test accuracy, reused by GPU tests.""" from sklearn.metrics import accuracy_score dtrain = xgboost.DMatrix(dpath + 'agaricus.txt.train?indexing_mode=1') dtest = xgboost.DMatrix(dpath + 'agaricus.txt.test?indexing_mode=1') params = { 'eta': 1, 'max_depth': 6, 'objective': 'binary:logistic', 'tree_method': tree_method, 'interaction_constraints': '[[1,2], [2,3,4]]' } num_boost_round = 5 params['grow_policy'] = 'lossguide' bst = xgboost.train(params, dtrain, num_boost_round) pred_dtest = (bst.predict(dtest) < 0.5) assert accuracy_score(dtest.get_label(), pred_dtest) < 0.1 params['grow_policy'] = 'depthwise' bst = xgboost.train(params, dtrain, num_boost_round) pred_dtest = (bst.predict(dtest) < 0.5) assert accuracy_score(dtest.get_label(), pred_dtest) < 0.1 @pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"]) def test_hist_training_accuracy(self, tree_method): self.training_accuracy(tree_method=tree_method)