#!/usr/bin/python import xgboost as xgb import numpy as np # this script demonstrates how to fit gamma regression model (with log link function) # in xgboost, before running the demo you need to generate the autoclaims dataset # by running gen_autoclaims.R located in xgboost/demo/data. data = np.genfromtxt('../data/autoclaims.csv', delimiter=',') dtrain = xgb.DMatrix(data[0:4741, 0:34], data[0:4741, 34]) dtest = xgb.DMatrix(data[4741:6773, 0:34], data[4741:6773, 34]) # for gamma regression, we need to set the objective to 'reg:gamma', it also suggests # to set the base_score to a value between 1 to 5 if the number of iteration is small param = {'silent':1, 'objective':'reg:gamma', 'booster':'gbtree', 'base_score':3} # the rest of settings are the same watchlist = [(dtest, 'eval'), (dtrain, 'train')] num_round = 30 # training and evaluation bst = xgb.train(param, dtrain, num_round, watchlist) preds = bst.predict(dtest) labels = dtest.get_label() print('test deviance=%f' % (2 * np.sum((labels - preds) / preds - np.log(labels) + np.log(preds))))