% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lgb.Dataset.R \name{lgb.Dataset.create.valid} \alias{lgb.Dataset.create.valid} \title{Construct validation data} \usage{ lgb.Dataset.create.valid( dataset, data, label = NULL, weight = NULL, group = NULL, init_score = NULL, params = list() ) } \arguments{ \item{dataset}{\code{lgb.Dataset} object, training data} \item{data}{a \code{matrix} object, a \code{dgCMatrix} object, a character representing a path to a text file (CSV, TSV, or LibSVM), or a character representing a path to a binary \code{Dataset} file} \item{label}{vector of labels to use as the target variable} \item{weight}{numeric vector of sample weights} \item{group}{used for learning-to-rank tasks. An integer vector describing how to group rows together as ordered results from the same set of candidate results to be ranked. For example, if you have a 100-document dataset with \code{group = c(10, 20, 40, 10, 10, 10)}, that means that you have 6 groups, where the first 10 records are in the first group, records 11-30 are in the second group, etc.} \item{init_score}{initial score is the base prediction lightgbm will boost from} \item{params}{a list of parameters. See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#dataset-parameters}{ The "Dataset Parameters" section of the documentation} for a list of parameters and valid values. If this is an empty list (the default), the validation Dataset will have the same parameters as the Dataset passed to argument \code{dataset}.} } \value{ constructed dataset } \description{ Construct validation data according to training data } \examples{ \donttest{ \dontshow{setLGBMthreads(2L)} \dontshow{data.table::setDTthreads(1L)} data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) data(agaricus.test, package = "lightgbm") test <- agaricus.test dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) # parameters can be changed between the training data and validation set, # for example to account for training data in a text file with a header row # and validation data in a text file without it train_file <- tempfile(pattern = "train_", fileext = ".csv") write.table( data.frame(y = rnorm(100L), x1 = rnorm(100L), x2 = rnorm(100L)) , file = train_file , sep = "," , col.names = TRUE , row.names = FALSE , quote = FALSE ) valid_file <- tempfile(pattern = "valid_", fileext = ".csv") write.table( data.frame(y = rnorm(100L), x1 = rnorm(100L), x2 = rnorm(100L)) , file = valid_file , sep = "," , col.names = FALSE , row.names = FALSE , quote = FALSE ) dtrain <- lgb.Dataset( data = train_file , params = list(has_header = TRUE) ) dtrain$construct() dvalid <- lgb.Dataset( data = valid_file , params = list(has_header = FALSE) ) dvalid$construct() } }