require(xgboost) context("interaction constraints") set.seed(1024) x1 <- rnorm(1000, 1) x2 <- rnorm(1000, 1) x3 <- sample(c(1, 2, 3), size = 1000, replace = TRUE) y <- x1 + x2 + x3 + x1 * x2 * x3 + rnorm(1000, 0.001) + 3 * sin(x1) train <- matrix(c(x1, x2, x3), ncol = 3) test_that("interaction constraints for regression", { # Fit a model that only allows interaction between x1 and x2 bst <- xgboost(data = train, label = y, max_depth = 3, eta = 0.1, nthread = 2, nrounds = 100, verbose = 0, interaction_constraints = list(c(0, 1))) # Set all observations to have the same x3 values then increment # by the same amount preds <- lapply(c(1, 2, 3), function(x){ tmat <- matrix(c(x1, x2, rep(x, 1000)), ncol = 3) return(predict(bst, tmat)) }) # Check incrementing x3 has the same effect on all observations # since x3 is constrained to be independent of x1 and x2 # and all observations start off from the same x3 value diff1 <- preds[[2]] - preds[[1]] test1 <- all(abs(diff1 - diff1[1]) < 1e-4) diff2 <- preds[[3]] - preds[[2]] test2 <- all(abs(diff2 - diff2[1]) < 1e-4) expect_true({ test1 & test2 }, "Interaction Contraint Satisfied") }) test_that("interaction constraints scientific representation", { rows <- 10 ## When number exceeds 1e5, R paste function uses scientific representation. ## See: https://github.com/dmlc/xgboost/issues/5179 cols <- 1e5 + 10 d <- matrix(rexp(rows, rate = .1), nrow = rows, ncol = cols) y <- rnorm(rows) dtrain <- xgb.DMatrix(data = d, info = list(label = y)) inc <- list(c(seq.int(from = 0, to = cols, by = 1))) with_inc <- xgb.train(data = dtrain, tree_method = 'hist', interaction_constraints = inc, nrounds = 10) without_inc <- xgb.train(data = dtrain, tree_method = 'hist', nrounds = 10) expect_equal(xgb.save.raw(with_inc), xgb.save.raw(without_inc)) })