""" Use scikit-learn regressor interface with CPU histogram tree method =================================================================== """ from dask.distributed import Client from dask.distributed import LocalCluster from dask import array as da import xgboost def main(client): # generate some random data for demonstration n = 100 m = 10000 partition_size = 100 X = da.random.random((m, n), partition_size) y = da.random.random(m, partition_size) regressor = xgboost.dask.DaskXGBRegressor(verbosity=1, n_estimators=2) regressor.set_params(tree_method="hist") # assigning client here is optional regressor.client = client regressor.fit(X, y, eval_set=[(X, y)]) prediction = regressor.predict(X) bst = regressor.get_booster() history = regressor.evals_result() print("Evaluation history:", history) # returned prediction is always a dask array. assert isinstance(prediction, da.Array) return bst # returning the trained model if __name__ == "__main__": # or use other clusters for scaling with LocalCluster(n_workers=4, threads_per_worker=1) as cluster: with Client(cluster) as client: main(client)