""" Demo for prediction using number of trees ========================================= """ import os import numpy as np import xgboost as xgb from sklearn.datasets import load_svmlight_file CURRENT_DIR = os.path.dirname(__file__) train = os.path.join(CURRENT_DIR, "../data/agaricus.txt.train") test = os.path.join(CURRENT_DIR, "../data/agaricus.txt.test") def native_interface(): # load data in do training dtrain = xgb.DMatrix(train) dtest = xgb.DMatrix(test) param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"} watchlist = [(dtest, "eval"), (dtrain, "train")] num_round = 3 bst = xgb.train(param, dtrain, num_round, watchlist) print("start testing prediction from first n trees") # predict using first 1 tree label = dtest.get_label() ypred1 = bst.predict(dtest, iteration_range=(0, 1)) # by default, we predict using all the trees ypred2 = bst.predict(dtest) print("error of ypred1=%f" % (np.sum((ypred1 > 0.5) != label) / float(len(label)))) print("error of ypred2=%f" % (np.sum((ypred2 > 0.5) != label) / float(len(label)))) def sklearn_interface(): X_train, y_train = load_svmlight_file(train) X_test, y_test = load_svmlight_file(test) clf = xgb.XGBClassifier(n_estimators=3, max_depth=2, eta=1, use_label_encoder=False) clf.fit(X_train, y_train, eval_set=[(X_test, y_test)]) assert clf.n_classes_ == 2 print("start testing prediction from first n trees") # predict using first 1 tree ypred1 = clf.predict(X_test, iteration_range=(0, 1)) # by default, we predict using all the trees ypred2 = clf.predict(X_test) print( "error of ypred1=%f" % (np.sum((ypred1 > 0.5) != y_test) / float(len(y_test))) ) print( "error of ypred2=%f" % (np.sum((ypred2 > 0.5) != y_test) / float(len(y_test))) ) if __name__ == "__main__": native_interface() sklearn_interface()