""" Demo for using cross validation =============================== """ import os import numpy as np import xgboost as xgb # load data in do training CURRENT_DIR = os.path.dirname(__file__) dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train')) param = {'max_depth':2, 'eta':1, 'objective':'binary:logistic'} num_round = 2 print('running cross validation') # do cross validation, this will print result out as # [iteration] metric_name:mean_value+std_value # std_value is standard deviation of the metric xgb.cv(param, dtrain, num_round, nfold=5, metrics={'error'}, seed=0, callbacks=[xgb.callback.EvaluationMonitor(show_stdv=True)]) print('running cross validation, disable standard deviation display') # do cross validation, this will print result out as # [iteration] metric_name:mean_value res = xgb.cv(param, dtrain, num_boost_round=10, nfold=5, metrics={'error'}, seed=0, callbacks=[xgb.callback.EvaluationMonitor(show_stdv=False), xgb.callback.EarlyStopping(3)]) print(res) print('running cross validation, with preprocessing function') # define the preprocessing function # used to return the preprocessed training, test data, and parameter # we can use this to do weight rescale, etc. # as a example, we try to set scale_pos_weight def fpreproc(dtrain, dtest, param): label = dtrain.get_label() ratio = float(np.sum(label == 0)) / np.sum(label == 1) param['scale_pos_weight'] = ratio return (dtrain, dtest, param) # do cross validation, for each fold # the dtrain, dtest, param will be passed into fpreproc # then the return value of fpreproc will be used to generate # results of that fold xgb.cv(param, dtrain, num_round, nfold=5, metrics={'auc'}, seed=0, fpreproc=fpreproc) ### # you can also do cross validation with customized loss function # See custom_objective.py ## print('running cross validation, with customized loss function') def logregobj(preds, dtrain): labels = dtrain.get_label() preds = 1.0 / (1.0 + np.exp(-preds)) grad = preds - labels hess = preds * (1.0 - preds) return grad, hess def evalerror(preds, dtrain): labels = dtrain.get_label() return 'error', float(sum(labels != (preds > 0.0))) / len(labels) param = {'max_depth':2, 'eta':1} # train with customized objective xgb.cv(param, dtrain, num_round, nfold=5, seed=0, obj=logregobj, feval=evalerror)