/*! * Copyright 2015 by Contributors * \file dense_libsvm.cc * \brief Plugin to load in libsvm, but fill all the missing entries with zeros. * This plugin is mainly used for benchmark purposes and do not need to be included. */ #include #include #include namespace dmlc { namespace data { template class DensifyParser : public dmlc::Parser { public: DensifyParser(dmlc::Parser* parser, uint32_t num_col) : parser_(parser), num_col_(num_col) { } void BeforeFirst() override { parser_->BeforeFirst(); } bool Next() override { if (!parser_->Next()) return false; const RowBlock& batch = parser_->Value(); LOG(INFO) << batch.size; dense_index_.resize(num_col_ * batch.size); dense_value_.resize(num_col_ * batch.size); std::fill(dense_value_.begin(), dense_value_.end(), 0.0); offset_.resize(batch.size + 1); offset_[0] = 0; for (size_t i = 0; i < batch.size; ++i) { offset_[i + 1] = (i + 1) * num_col_; Row row = batch[i]; for (uint32_t j = 0; j < num_col_; ++j) { dense_index_[i * num_col_ + j] = j; } for (unsigned k = 0; k < row.length; ++k) { uint32_t index = row.get_index(k); CHECK_LT(index, num_col_) << "Featuere index larger than num_col"; dense_value_[i * num_col_ + index] = row.get_value(k); } } out_ = batch; out_.index = dmlc::BeginPtr(dense_index_); out_.value = dmlc::BeginPtr(dense_value_); out_.offset = dmlc::BeginPtr(offset_); return true; } const dmlc::RowBlock& Value() const override { return out_; } size_t BytesRead() const override { return parser_->BytesRead(); } private: RowBlock out_; std::unique_ptr > parser_; uint32_t num_col_; std::vector offset_; std::vector dense_index_; std::vector dense_value_; }; template Parser * CreateDenseLibSVMParser(const std::string& path, const std::map& args, unsigned part_index, unsigned num_parts) { CHECK_NE(args.count("num_col"), 0) << "expect num_col in dense_libsvm"; return new DensifyParser( Parser::Create(path.c_str(), part_index, num_parts, "libsvm"), uint32_t(atoi(args.at("num_col").c_str()))); } } // namespace data DMLC_REGISTER_DATA_PARSER(uint32_t, real_t, dense_libsvm, data::CreateDenseLibSVMParser); } // namespace dmlc