/*! * Copyright (c) 2016 Microsoft Corporation. All rights reserved. * Licensed under the MIT License. See LICENSE file in the project root for license information. */ #ifndef LIGHTGBM_UTILS_COMMON_H_ #define LIGHTGBM_UTILS_COMMON_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define FMT_HEADER_ONLY #include "fast_double_parser.h" #include "fmt/format.h" #ifdef _MSC_VER #include #pragma intrinsic(_BitScanReverse) #endif #if defined(_MSC_VER) #include #elif MM_MALLOC #include // https://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html // https://www.oreilly.com/library/view/mac-os-x/0596003560/ch05s01s02.html #elif defined(__GNUC__) && defined(HAVE_MALLOC_H) #include #define _mm_malloc(a, b) memalign(b, a) #define _mm_free(a) free(a) #else #include #define _mm_malloc(a, b) malloc(a) #define _mm_free(a) free(a) #endif namespace LightGBM { namespace Common { using json11_internal_lightgbm::Json; /*! * Imbues the stream with the C locale. */ static void C_stringstream(std::stringstream &ss) { ss.imbue(std::locale::classic()); } inline static char tolower(char in) { if (in <= 'Z' && in >= 'A') return in - ('Z' - 'z'); return in; } inline static std::string Trim(std::string str) { if (str.empty()) { return str; } str.erase(str.find_last_not_of(" \f\n\r\t\v") + 1); str.erase(0, str.find_first_not_of(" \f\n\r\t\v")); return str; } inline static std::string RemoveQuotationSymbol(std::string str) { if (str.empty()) { return str; } str.erase(str.find_last_not_of("'\"") + 1); str.erase(0, str.find_first_not_of("'\"")); return str; } inline static bool StartsWith(const std::string& str, const std::string prefix) { if (str.substr(0, prefix.size()) == prefix) { return true; } else { return false; } } inline static std::vector Split(const char* c_str, char delimiter) { std::vector ret; std::string str(c_str); size_t i = 0; size_t pos = 0; while (pos < str.length()) { if (str[pos] == delimiter) { if (i < pos) { ret.push_back(str.substr(i, pos - i)); } ++pos; i = pos; } else { ++pos; } } if (i < pos) { ret.push_back(str.substr(i)); } return ret; } inline static std::vector SplitBrackets(const char* c_str, char left_delimiter, char right_delimiter) { std::vector ret; std::string str(c_str); size_t i = 0; size_t pos = 0; bool open = false; while (pos < str.length()) { if (str[pos] == left_delimiter) { open = true; ++pos; i = pos; } else if (str[pos] == right_delimiter && open) { if (i < pos) { ret.push_back(str.substr(i, pos - i)); } open = false; ++pos; } else { ++pos; } } return ret; } inline static std::vector SplitLines(const char* c_str) { std::vector ret; std::string str(c_str); size_t i = 0; size_t pos = 0; while (pos < str.length()) { if (str[pos] == '\n' || str[pos] == '\r') { if (i < pos) { ret.push_back(str.substr(i, pos - i)); } // skip the line endings while (str[pos] == '\n' || str[pos] == '\r') ++pos; // new begin i = pos; } else { ++pos; } } if (i < pos) { ret.push_back(str.substr(i)); } return ret; } inline static std::vector Split(const char* c_str, const char* delimiters) { std::vector ret; std::string str(c_str); size_t i = 0; size_t pos = 0; while (pos < str.length()) { bool met_delimiters = false; for (int j = 0; delimiters[j] != '\0'; ++j) { if (str[pos] == delimiters[j]) { met_delimiters = true; break; } } if (met_delimiters) { if (i < pos) { ret.push_back(str.substr(i, pos - i)); } ++pos; i = pos; } else { ++pos; } } if (i < pos) { ret.push_back(str.substr(i)); } return ret; } inline static std::string GetFromParserConfig(std::string config_str, std::string key) { // parser config should follow json format. std::string err; Json config_json = Json::parse(config_str, &err); if (!err.empty()) { Log::Fatal("Invalid parser config: %s. Please check if follow json format.", err.c_str()); } return config_json[key].string_value(); } inline static std::string SaveToParserConfig(std::string config_str, std::string key, std::string value) { std::string err; Json config_json = Json::parse(config_str, &err); if (!err.empty()) { Log::Fatal("Invalid parser config: %s. Please check if follow json format.", err.c_str()); } CHECK(config_json.is_object()); std::map config_map = config_json.object_items(); config_map.insert(std::pair(key, Json(value))); return Json(config_map).dump(); } template inline static const char* Atoi(const char* p, T* out) { int sign; T value; while (*p == ' ') { ++p; } sign = 1; if (*p == '-') { sign = -1; ++p; } else if (*p == '+') { ++p; } for (value = 0; *p >= '0' && *p <= '9'; ++p) { value = value * 10 + (*p - '0'); } *out = static_cast(sign * value); while (*p == ' ') { ++p; } return p; } template inline static double Pow(T base, int power) { if (power < 0) { return 1.0 / Pow(base, -power); } else if (power == 0) { return 1; } else if (power % 2 == 0) { return Pow(base*base, power / 2); } else if (power % 3 == 0) { return Pow(base*base*base, power / 3); } else { return base * Pow(base, power - 1); } } inline static const char* Atof(const char* p, double* out) { int frac; double sign, value, scale; *out = NAN; // Skip leading white space, if any. while (*p == ' ') { ++p; } // Get sign, if any. sign = 1.0; if (*p == '-') { sign = -1.0; ++p; } else if (*p == '+') { ++p; } // is a number if ((*p >= '0' && *p <= '9') || *p == '.' || *p == 'e' || *p == 'E') { // Get digits before decimal point or exponent, if any. for (value = 0.0; *p >= '0' && *p <= '9'; ++p) { value = value * 10.0 + (*p - '0'); } // Get digits after decimal point, if any. if (*p == '.') { double right = 0.0; int nn = 0; ++p; while (*p >= '0' && *p <= '9') { right = (*p - '0') + right * 10.0; ++nn; ++p; } value += right / Pow(10.0, nn); } // Handle exponent, if any. frac = 0; scale = 1.0; if ((*p == 'e') || (*p == 'E')) { uint32_t expon; // Get sign of exponent, if any. ++p; if (*p == '-') { frac = 1; ++p; } else if (*p == '+') { ++p; } // Get digits of exponent, if any. for (expon = 0; *p >= '0' && *p <= '9'; ++p) { expon = expon * 10 + (*p - '0'); } if (expon > 308) expon = 308; // Calculate scaling factor. while (expon >= 50) { scale *= 1E50; expon -= 50; } while (expon >= 8) { scale *= 1E8; expon -= 8; } while (expon > 0) { scale *= 10.0; expon -= 1; } } // Return signed and scaled floating point result. *out = sign * (frac ? (value / scale) : (value * scale)); } else { size_t cnt = 0; while (*(p + cnt) != '\0' && *(p + cnt) != ' ' && *(p + cnt) != '\t' && *(p + cnt) != ',' && *(p + cnt) != '\n' && *(p + cnt) != '\r' && *(p + cnt) != ':') { ++cnt; } if (cnt > 0) { std::string tmp_str(p, cnt); std::transform(tmp_str.begin(), tmp_str.end(), tmp_str.begin(), Common::tolower); if (tmp_str == std::string("na") || tmp_str == std::string("nan") || tmp_str == std::string("null")) { *out = NAN; } else if (tmp_str == std::string("inf") || tmp_str == std::string("infinity")) { *out = sign * 1e308; } else { Log::Fatal("Unknown token %s in data file", tmp_str.c_str()); } p += cnt; } } while (*p == ' ') { ++p; } return p; } // Use fast_double_parse and strtod (if parse failed) to parse double. inline static const char* AtofPrecise(const char* p, double* out) { const char* end = fast_double_parser::parse_number(p, out); if (end != nullptr) { return end; } // Rare path: Not in RFC 7159 format. Possible "inf", "nan", etc. Fallback to standard library: char* end2; errno = 0; // This is Required before calling strtod. *out = std::strtod(p, &end2); // strtod is locale aware. if (end2 == p) { Log::Fatal("no conversion to double for: %s", p); } if (errno == ERANGE) { Log::Warning("convert to double got underflow or overflow: %s", p); } return end2; } inline static bool AtoiAndCheck(const char* p, int* out) { const char* after = Atoi(p, out); if (*after != '\0') { return false; } return true; } inline static bool AtofAndCheck(const char* p, double* out) { const char* after = Atof(p, out); if (*after != '\0') { return false; } return true; } inline static const char* SkipSpaceAndTab(const char* p) { while (*p == ' ' || *p == '\t') { ++p; } return p; } inline static const char* SkipReturn(const char* p) { while (*p == '\n' || *p == '\r' || *p == ' ') { ++p; } return p; } template inline static std::vector ArrayCast(const std::vector& arr) { std::vector ret(arr.size()); for (size_t i = 0; i < arr.size(); ++i) { ret[i] = static_cast(arr[i]); } return ret; } template struct __StringToTHelper { T operator()(const std::string& str) const { T ret = 0; Atoi(str.c_str(), &ret); return ret; } }; template struct __StringToTHelper { T operator()(const std::string& str) const { return static_cast(std::stod(str)); } }; template inline static std::vector StringToArray(const std::string& str, char delimiter) { std::vector strs = Split(str.c_str(), delimiter); std::vector ret; ret.reserve(strs.size()); __StringToTHelper::value> helper; for (const auto& s : strs) { ret.push_back(helper(s)); } return ret; } template inline static std::vector> StringToArrayofArrays( const std::string& str, char left_bracket, char right_bracket, char delimiter) { std::vector strs = SplitBrackets(str.c_str(), left_bracket, right_bracket); std::vector> ret; for (const auto& s : strs) { ret.push_back(StringToArray(s, delimiter)); } return ret; } template inline static std::vector StringToArray(const std::string& str, int n) { if (n == 0) { return std::vector(); } std::vector strs = Split(str.c_str(), ' '); CHECK_EQ(strs.size(), static_cast(n)); std::vector ret; ret.reserve(strs.size()); __StringToTHelper::value> helper; for (const auto& s : strs) { ret.push_back(helper(s)); } return ret; } template struct __StringToTHelperFast { const char* operator()(const char*p, T* out) const { return Atoi(p, out); } }; template struct __StringToTHelperFast { const char* operator()(const char*p, T* out) const { double tmp = 0.0f; auto ret = Atof(p, &tmp); *out = static_cast(tmp); return ret; } }; template inline static std::vector StringToArrayFast(const std::string& str, int n) { if (n == 0) { return std::vector(); } auto p_str = str.c_str(); __StringToTHelperFast::value> helper; std::vector ret(n); for (int i = 0; i < n; ++i) { p_str = helper(p_str, &ret[i]); } return ret; } template inline static std::string Join(const std::vector& strs, const char* delimiter, const bool force_C_locale = false) { if (strs.empty()) { return std::string(""); } std::stringstream str_buf; if (force_C_locale) { C_stringstream(str_buf); } str_buf << std::setprecision(std::numeric_limits::digits10 + 2); str_buf << strs[0]; for (size_t i = 1; i < strs.size(); ++i) { str_buf << delimiter; str_buf << strs[i]; } return str_buf.str(); } template<> inline std::string Join(const std::vector& strs, const char* delimiter, const bool force_C_locale) { if (strs.empty()) { return std::string(""); } std::stringstream str_buf; if (force_C_locale) { C_stringstream(str_buf); } str_buf << std::setprecision(std::numeric_limits::digits10 + 2); str_buf << static_cast(strs[0]); for (size_t i = 1; i < strs.size(); ++i) { str_buf << delimiter; str_buf << static_cast(strs[i]); } return str_buf.str(); } template inline static std::string Join(const std::vector& strs, size_t start, size_t end, const char* delimiter, const bool force_C_locale = false) { if (end - start <= 0) { return std::string(""); } start = std::min(start, static_cast(strs.size()) - 1); end = std::min(end, static_cast(strs.size())); std::stringstream str_buf; if (force_C_locale) { C_stringstream(str_buf); } str_buf << std::setprecision(std::numeric_limits::digits10 + 2); str_buf << strs[start]; for (size_t i = start + 1; i < end; ++i) { str_buf << delimiter; str_buf << strs[i]; } return str_buf.str(); } inline static int64_t Pow2RoundUp(int64_t x) { int64_t t = 1; for (int i = 0; i < 64; ++i) { if (t >= x) { return t; } t <<= 1; } return 0; } /*! * \brief Do inplace softmax transformation on p_rec * \param p_rec The input/output vector of the values. */ inline static void Softmax(std::vector* p_rec) { std::vector &rec = *p_rec; double wmax = rec[0]; for (size_t i = 1; i < rec.size(); ++i) { wmax = std::max(rec[i], wmax); } double wsum = 0.0f; for (size_t i = 0; i < rec.size(); ++i) { rec[i] = std::exp(rec[i] - wmax); wsum += rec[i]; } for (size_t i = 0; i < rec.size(); ++i) { rec[i] /= static_cast(wsum); } } inline static void Softmax(const double* input, double* output, int len) { double wmax = input[0]; for (int i = 1; i < len; ++i) { wmax = std::max(input[i], wmax); } double wsum = 0.0f; for (int i = 0; i < len; ++i) { output[i] = std::exp(input[i] - wmax); wsum += output[i]; } for (int i = 0; i < len; ++i) { output[i] /= static_cast(wsum); } } template std::vector ConstPtrInVectorWrapper(const std::vector>& input) { std::vector ret; for (auto t = input.begin(); t !=input.end(); ++t) { ret.push_back(t->get()); } return ret; } template inline static void SortForPair(std::vector* keys, std::vector* values, size_t start, bool is_reverse = false) { std::vector> arr; auto& ref_key = *keys; auto& ref_value = *values; for (size_t i = start; i < keys->size(); ++i) { arr.emplace_back(ref_key[i], ref_value[i]); } if (!is_reverse) { std::stable_sort(arr.begin(), arr.end(), [](const std::pair& a, const std::pair& b) { return a.first < b.first; }); } else { std::stable_sort(arr.begin(), arr.end(), [](const std::pair& a, const std::pair& b) { return a.first > b.first; }); } for (size_t i = start; i < arr.size(); ++i) { ref_key[i] = arr[i].first; ref_value[i] = arr[i].second; } } template inline static std::vector Vector2Ptr(std::vector>* data) { std::vector ptr(data->size()); auto& ref_data = *data; for (size_t i = 0; i < data->size(); ++i) { ptr[i] = ref_data[i].data(); } return ptr; } template inline static std::vector VectorSize(const std::vector>& data) { std::vector ret(data.size()); for (size_t i = 0; i < data.size(); ++i) { ret[i] = static_cast(data[i].size()); } return ret; } inline static double AvoidInf(double x) { if (std::isnan(x)) { return 0.0; } else if (x >= 1e300) { return 1e300; } else if (x <= -1e300) { return -1e300; } else { return x; } } inline static float AvoidInf(float x) { if (std::isnan(x)) { return 0.0f; } else if (x >= 1e38) { return 1e38f; } else if (x <= -1e38) { return -1e38f; } else { return x; } } template inline static typename std::iterator_traits<_Iter>::value_type* IteratorValType(_Iter) { return (0); } template inline static void ParallelSort(_RanIt _First, _RanIt _Last, _Pr _Pred, _VTRanIt*) { size_t len = _Last - _First; const size_t kMinInnerLen = 1024; int num_threads = OMP_NUM_THREADS(); if (len <= kMinInnerLen || num_threads <= 1) { std::sort(_First, _Last, _Pred); return; } size_t inner_size = (len + num_threads - 1) / num_threads; inner_size = std::max(inner_size, kMinInnerLen); num_threads = static_cast((len + inner_size - 1) / inner_size); #pragma omp parallel for num_threads(num_threads) schedule(static, 1) for (int i = 0; i < num_threads; ++i) { size_t left = inner_size*i; size_t right = left + inner_size; right = std::min(right, len); if (right > left) { std::sort(_First + left, _First + right, _Pred); } } // Buffer for merge. std::vector<_VTRanIt> temp_buf(len); _RanIt buf = temp_buf.begin(); size_t s = inner_size; // Recursive merge while (s < len) { int loop_size = static_cast((len + s * 2 - 1) / (s * 2)); #pragma omp parallel for num_threads(num_threads) schedule(static, 1) for (int i = 0; i < loop_size; ++i) { size_t left = i * 2 * s; size_t mid = left + s; size_t right = mid + s; right = std::min(len, right); if (mid >= right) { continue; } std::copy(_First + left, _First + mid, buf + left); std::merge(buf + left, buf + mid, _First + mid, _First + right, _First + left, _Pred); } s *= 2; } } template inline static void ParallelSort(_RanIt _First, _RanIt _Last, _Pr _Pred) { return ParallelSort(_First, _Last, _Pred, IteratorValType(_First)); } // Check that all y[] are in interval [ymin, ymax] (end points included); throws error if not template inline static void CheckElementsIntervalClosed(const T *y, T ymin, T ymax, int ny, const char *callername) { auto fatal_msg = [&y, &ymin, &ymax, &callername](int i) { std::ostringstream os; os << "[%s]: does not tolerate element [#%i = " << y[i] << "] outside [" << ymin << ", " << ymax << "]"; Log::Fatal(os.str().c_str(), callername, i); }; for (int i = 1; i < ny; i += 2) { if (y[i - 1] < y[i]) { if (y[i - 1] < ymin) { fatal_msg(i - 1); } else if (y[i] > ymax) { fatal_msg(i); } } else { if (y[i - 1] > ymax) { fatal_msg(i - 1); } else if (y[i] < ymin) { fatal_msg(i); } } } if (ny & 1) { // odd if (y[ny - 1] < ymin || y[ny - 1] > ymax) { fatal_msg(ny - 1); } } } // One-pass scan over array w with nw elements: find min, max and sum of elements; // this is useful for checking weight requirements. template inline static void ObtainMinMaxSum(const T1 *w, int nw, T1 *mi, T1 *ma, T2 *su) { T1 minw; T1 maxw; T1 sumw; int i; if (nw & 1) { // odd minw = w[0]; maxw = w[0]; sumw = w[0]; i = 2; } else { // even if (w[0] < w[1]) { minw = w[0]; maxw = w[1]; } else { minw = w[1]; maxw = w[0]; } sumw = w[0] + w[1]; i = 3; } for (; i < nw; i += 2) { if (w[i - 1] < w[i]) { minw = std::min(minw, w[i - 1]); maxw = std::max(maxw, w[i]); } else { minw = std::min(minw, w[i]); maxw = std::max(maxw, w[i - 1]); } sumw += w[i - 1] + w[i]; } if (mi != nullptr) { *mi = minw; } if (ma != nullptr) { *ma = maxw; } if (su != nullptr) { *su = static_cast(sumw); } } inline static std::vector EmptyBitset(int n) { int size = n / 32; if (n % 32 != 0) ++size; return std::vector(size); } template inline static void InsertBitset(std::vector* vec, const T val) { auto& ref_v = *vec; int i1 = val / 32; int i2 = val % 32; if (static_cast(vec->size()) < i1 + 1) { vec->resize(i1 + 1, 0); } ref_v[i1] |= (1 << i2); } template inline static std::vector ConstructBitset(const T* vals, int n) { std::vector ret; for (int i = 0; i < n; ++i) { int i1 = vals[i] / 32; int i2 = vals[i] % 32; if (static_cast(ret.size()) < i1 + 1) { ret.resize(i1 + 1, 0); } ret[i1] |= (1 << i2); } return ret; } template inline static bool FindInBitset(const uint32_t* bits, int n, T pos) { int i1 = pos / 32; if (i1 >= n) { return false; } int i2 = pos % 32; return (bits[i1] >> i2) & 1; } inline static bool CheckDoubleEqualOrdered(double a, double b) { double upper = std::nextafter(a, INFINITY); return b <= upper; } inline static double GetDoubleUpperBound(double a) { return std::nextafter(a, INFINITY); } inline static size_t GetLine(const char* str) { auto start = str; while (*str != '\0' && *str != '\n' && *str != '\r') { ++str; } return str - start; } inline static const char* SkipNewLine(const char* str) { if (*str == '\r') { ++str; } if (*str == '\n') { ++str; } return str; } template static int Sign(T x) { return (x > T(0)) - (x < T(0)); } template static T SafeLog(T x) { if (x > 0) { return std::log(x); } else { return -INFINITY; } } inline bool CheckAllowedJSON(const std::string& s) { unsigned char char_code; for (auto c : s) { char_code = static_cast(c); if (char_code == 34 // " || char_code == 44 // , || char_code == 58 // : || char_code == 91 // [ || char_code == 93 // ] || char_code == 123 // { || char_code == 125 // } ) { return false; } } return true; } inline int RoundInt(double x) { return static_cast(x + 0.5f); } template class AlignmentAllocator { public: typedef T value_type; typedef std::size_t size_type; typedef std::ptrdiff_t difference_type; typedef T* pointer; typedef const T* const_pointer; typedef T& reference; typedef const T& const_reference; inline AlignmentAllocator() throw() {} template inline AlignmentAllocator(const AlignmentAllocator&) throw() {} inline ~AlignmentAllocator() throw() {} inline pointer adress(reference r) { return &r; } inline const_pointer adress(const_reference r) const { return &r; } inline pointer allocate(size_type n) { return (pointer)_mm_malloc(n * sizeof(value_type), N); } inline void deallocate(pointer p, size_type) { _mm_free(p); } inline void construct(pointer p, const value_type& wert) { new (p) value_type(wert); } inline void destroy(pointer p) { p->~value_type(); } inline size_type max_size() const throw() { return size_type(-1) / sizeof(value_type); } template struct rebind { typedef AlignmentAllocator other; }; bool operator!=(const AlignmentAllocator& other) const { return !(*this == other); } // Returns true if and only if storage allocated from *this // can be deallocated from other, and vice versa. // Always returns true for stateless allocators. bool operator==(const AlignmentAllocator&) const { return true; } }; class Timer { public: Timer() { #ifdef TIMETAG int num_threads = OMP_NUM_THREADS(); start_time_.resize(num_threads); stats_.resize(num_threads); #endif // TIMETAG } ~Timer() { Print(); } #ifdef TIMETAG void Start(const std::string& name) { auto tid = omp_get_thread_num(); start_time_[tid][name] = std::chrono::steady_clock::now(); } void Stop(const std::string& name) { auto cur_time = std::chrono::steady_clock::now(); auto tid = omp_get_thread_num(); if (stats_[tid].find(name) == stats_[tid].end()) { stats_[tid][name] = std::chrono::duration(0); } stats_[tid][name] += cur_time - start_time_[tid][name]; } #else void Start(const std::string&) {} void Stop(const std::string&) {} #endif // TIMETAG void Print() const { #ifdef TIMETAG std::unordered_map> stats(stats_[0].begin(), stats_[0].end()); for (size_t i = 1; i < stats_.size(); ++i) { for (auto it = stats_[i].begin(); it != stats_[i].end(); ++it) { if (stats.find(it->first) == stats.end()) { stats[it->first] = it->second; } else { stats[it->first] += it->second; } } } std::map> ordered( stats.begin(), stats.end()); for (auto it = ordered.begin(); it != ordered.end(); ++it) { Log::Info("%s costs:\t %f", it->first.c_str(), it->second * 1e-3); } #endif // TIMETAG } #ifdef TIMETAG std::vector< std::unordered_map> start_time_; std::vector>> stats_; #endif // TIMETAG }; // Note: this class is not thread-safe, don't use it inside omp blocks class FunctionTimer { public: #ifdef TIMETAG FunctionTimer(const std::string& name, Timer& timer) : timer_(timer) { timer.Start(name); name_ = name; } ~FunctionTimer() { timer_.Stop(name_); } private: std::string name_; Timer& timer_; #else FunctionTimer(const std::string&, Timer&) {} #endif // TIMETAG }; } // namespace Common extern Common::Timer global_timer; /*! * Provides locale-independent alternatives to Common's methods. * Essential to make models robust to locale settings. */ namespace CommonC { template inline static std::string Join(const std::vector& strs, const char* delimiter) { return LightGBM::Common::Join(strs, delimiter, true); } template inline static std::string Join(const std::vector& strs, size_t start, size_t end, const char* delimiter) { return LightGBM::Common::Join(strs, start, end, delimiter, true); } inline static const char* Atof(const char* p, double* out) { return LightGBM::Common::Atof(p, out); } template struct __StringToTHelperFast { const char* operator()(const char*p, T* out) const { return LightGBM::Common::Atoi(p, out); } }; /*! * \warning Beware that ``Common::Atof`` in ``__StringToTHelperFast``, * has **less** floating point precision than ``__StringToTHelper``. * Both versions are kept to maintain bit-for-bit the "legacy" LightGBM behaviour in terms of precision. * Check ``StringToArrayFast`` and ``StringToArray`` for more details on this. */ template struct __StringToTHelperFast { const char* operator()(const char*p, T* out) const { double tmp = 0.0f; auto ret = Atof(p, &tmp); *out = static_cast(tmp); return ret; } }; template struct __StringToTHelper { T operator()(const std::string& str) const { T ret = 0; LightGBM::Common::Atoi(str.c_str(), &ret); return ret; } }; /*! * \warning Beware that ``Common::Atof`` in ``__StringToTHelperFast``, * has **less** floating point precision than ``__StringToTHelper``. * Both versions are kept to maintain bit-for-bit the "legacy" LightGBM behaviour in terms of precision. * Check ``StringToArrayFast`` and ``StringToArray`` for more details on this. * \note It is possible that ``fast_double_parser::parse_number`` is faster than ``Common::Atof``. */ template struct __StringToTHelper { T operator()(const std::string& str) const { double tmp; const char* end = Common::AtofPrecise(str.c_str(), &tmp); if (end == str.c_str()) { Log::Fatal("Failed to parse double: %s", str.c_str()); } return static_cast(tmp); } }; /*! * \warning Beware that due to internal use of ``Common::Atof`` in ``__StringToTHelperFast``, * this method has less precision for floating point numbers than ``StringToArray``, * which calls ``__StringToTHelper``. * As such, ``StringToArrayFast`` and ``StringToArray`` are not equivalent! * Both versions were kept to maintain bit-for-bit the "legacy" LightGBM behaviour in terms of precision. */ template inline static std::vector StringToArrayFast(const std::string& str, int n) { if (n == 0) { return std::vector(); } auto p_str = str.c_str(); __StringToTHelperFast::value> helper; std::vector ret(n); for (int i = 0; i < n; ++i) { p_str = helper(p_str, &ret[i]); } return ret; } /*! * \warning Do not replace calls to this method by ``StringToArrayFast``. * This method is more precise for floating point numbers. * Check ``StringToArrayFast`` for more details. */ template inline static std::vector StringToArray(const std::string& str, int n) { if (n == 0) { return std::vector(); } std::vector strs = LightGBM::Common::Split(str.c_str(), ' '); CHECK_EQ(strs.size(), static_cast(n)); std::vector ret; ret.reserve(strs.size()); __StringToTHelper::value> helper; for (const auto& s : strs) { ret.push_back(helper(s)); } return ret; } /*! * \warning Do not replace calls to this method by ``StringToArrayFast``. * This method is more precise for floating point numbers. * Check ``StringToArrayFast`` for more details. */ template inline static std::vector StringToArray(const std::string& str, char delimiter) { std::vector strs = LightGBM::Common::Split(str.c_str(), delimiter); std::vector ret; ret.reserve(strs.size()); __StringToTHelper::value> helper; for (const auto& s : strs) { ret.push_back(helper(s)); } return ret; } /*! * Safely formats a value onto a buffer according to a format string and null-terminates it. * * \note It checks that the full value was written or forcefully aborts. * This safety check serves to prevent incorrect internal API usage. * Correct usage will never incur in this problem: * - The received buffer size shall be sufficient at all times for the input format string and value. */ template inline static void format_to_buf(char* buffer, const size_t buf_len, const char* format, const T value) { auto result = fmt::format_to_n(buffer, buf_len, format, value); if (result.size >= buf_len) { Log::Fatal("Numerical conversion failed. Buffer is too small."); } buffer[result.size] = '\0'; } template struct __TToStringHelper { void operator()(T value, char* buffer, size_t buf_len) const { format_to_buf(buffer, buf_len, "{}", value); } }; template struct __TToStringHelper { void operator()(T value, char* buffer, size_t buf_len) const { format_to_buf(buffer, buf_len, "{:g}", value); } }; template struct __TToStringHelper { void operator()(T value, char* buffer, size_t buf_len) const { format_to_buf(buffer, buf_len, "{:.17g}", value); } }; /*! * Converts an array to a string with with values separated by the space character. * This method replaces Common's ``ArrayToString`` and ``ArrayToStringFast`` functionality * and is locale-independent. * * \note If ``high_precision_output`` is set to true, * floating point values are output with more digits of precision. */ template inline static std::string ArrayToString(const std::vector& arr, size_t n) { if (arr.empty() || n == 0) { return std::string(""); } __TToStringHelper::value, high_precision_output> helper; const size_t buf_len = high_precision_output ? 32 : 16; std::vector buffer(buf_len); std::stringstream str_buf; Common::C_stringstream(str_buf); helper(arr[0], buffer.data(), buf_len); str_buf << buffer.data(); for (size_t i = 1; i < std::min(n, arr.size()); ++i) { helper(arr[i], buffer.data(), buf_len); str_buf << ' ' << buffer.data(); } return str_buf.str(); } } // namespace CommonC } // namespace LightGBM #endif // LIGHTGBM_UTILS_COMMON_H_