require(xgboost) require(methods) train = read.csv('data/train.csv',header=TRUE,stringsAsFactors = FALSE) test = read.csv('data/test.csv',header=TRUE,stringsAsFactors = FALSE) train = train[,-1] test = test[,-1] y = train[,ncol(train)] y = gsub('Class_','',y) y = as.integer(y)-1 # xgboost take features in [0,numOfClass) x = rbind(train[,-ncol(train)],test) x = as.matrix(x) x = matrix(as.numeric(x),nrow(x),ncol(x)) trind = 1:length(y) teind = (nrow(train)+1):nrow(x) # Set necessary parameter param <- list("objective" = "multi:softprob", "eval_metric" = "mlogloss", "num_class" = 9, "nthread" = 8) # Run Cross Validation cv.nrounds = 50 bst.cv = xgb.cv(param=param, data = x[trind,], label = y, nfold = 3, nrounds=cv.nrounds) # Train the model nrounds = 50 bst = xgboost(param=param, data = x[trind,], label = y, nrounds=nrounds) # Make prediction pred = predict(bst,x[teind,]) pred = matrix(pred,9,length(pred)/9) pred = t(pred) # Output submission pred = format(pred, digits=2,scientific=F) # shrink the size of submission pred = data.frame(1:nrow(pred),pred) names(pred) = c('id', paste0('Class_',1:9)) write.csv(pred,file='submission.csv', quote=FALSE,row.names=FALSE)