/*! * Copyright (c) 2015 by Contributors * \file json.h * \brief Lightweight JSON Reader/Writer that read save into C++ data structs. * This includes STL composites and structures. */ #ifndef DMLC_JSON_H_ #define DMLC_JSON_H_ // This code requires C++11 to compile #include #ifndef _LIBCPP_SGX_NO_IOSTREAMS #include #endif #include #include #include #include #include #include #include "./base.h" #include "./logging.h" #include "./type_traits.h" #if DMLC_USE_CXX11 #include #include #include #if DMLC_STRICT_CXX11 #if DMLC_ENABLE_RTTI #include "./any.h" #endif // DMLC_ENABLE_RTTI #endif // DMLC_STRICT_CXX11 #endif // DMLC_USE_CXX11 namespace dmlc { /*! * \brief Lightweight JSON Reader to read any STL compositions and structs. * The user need to know the schema of the * */ class JSONReader { public: /*! * \brief Constructor. * \param is the input source. */ #ifndef _LIBCPP_SGX_NO_IOSTREAMS explicit JSONReader(std::istream *is) #else explicit JSONReader(std::string *is) #endif : is_(is), line_count_r_(0), line_count_n_(0) {} /*! * \brief Parse next JSON string. * \param out_str the output string. * \throw dmlc::Error when next token is not string */ inline void ReadString(std::string *out_str); /*! * \brief Read Number. * \param out_value output value; * \throw dmlc::Error when next token is not number of ValueType. * \tparam ValueType type of the number */ template inline void ReadNumber(ValueType *out_value); /*! * \brief Begin parsing an object. * \code * std::string key; * // value can be any type that is json serializable. * std::string value; * reader->BeginObject(); * while (reader->NextObjectItem(&key)) { * // do somthing to key value * reader->Read(&value); * } * \endcode */ inline void BeginObject(); /*! * \brief Begin parsing an array. * \code * // value can be any type that is json serializable. * std::string value; * reader->BeginArray(); * while (reader->NextObjectArrayItem(&value)) { * // do somthing to value * } * \endcode */ inline void BeginArray(); /*! * \brief Try to move to next object item. * If this call is successful, user can proceed to call * reader->Read to read in the value. * \param out_key the key to the next object. * \return true if the read is successful, false if we are at end of the object. */ inline bool NextObjectItem(std::string *out_key); /*! * \brief Try to read the next element in the array. * If this call is successful, user can proceed to call * reader->Read to read in the value. * \return true if the read is successful, false if we are at end of the array. */ inline bool NextArrayItem(); /*! * \brief Read next ValueType. * \param out_value any STL or json readable type to be read * \throw dmlc::Error when the read of ValueType is not successful. * \tparam ValueType the data type to be read. */ template inline void Read(ValueType *out_value); /*! \return current line count */ inline std::string line_info() const { #ifndef _LIBCPP_SGX_NO_IOSTREAMS char temp[64]; std::ostringstream os; os << " Line " << std::max(line_count_r_, line_count_n_); is_->getline(temp, 64); os << ", around ^`" << temp << "`"; return os.str(); #else std::string info = " Line "; info += std::to_string(std::max(line_count_r_, line_count_n_)); // string getline size_t end_pos = is_->find('\n'); end_pos = std::min((size_t)64, end_pos == std::string::npos ? is_->size() : end_pos); std::string line = is_->substr(0, end_pos); is_->erase(0, line.size() + 1); // +1 for \n info += ", around ^`" + line + "`"; return info; #endif } private: #ifndef _LIBCPP_SGX_NO_IOSTREAMS /*! \brief internal reader stream */ std::istream *is_; #else /*! \brief internal reader string */ std::string *is_; #endif /*! \brief "\\r" counter */ size_t line_count_r_; /*! \brief "\\n" counter */ size_t line_count_n_; /*! * \brief record how many element processed in * current array/object scope. */ std::vector scope_counter_; /*! * \brief Read next nonspace character. * \return the next nonspace character. */ inline int NextNonSpace(); /*! * \brief Read just before next nonspace but not read that. * \return the next nonspace character. */ inline int PeekNextNonSpace(); /*! * \brief Takes the next char from the input source. * \return the next character. */ inline int NextChar(); /*! * \brief Returns the next char from the input source. * \return the next character. */ inline int PeekNextChar(); }; /*! * \brief Lightweight json to write any STL compositions. */ class JSONWriter { public: /*! * \brief Constructor. * \param os the output reciever. */ #ifndef _LIBCPP_SGX_NO_IOSTREAMS explicit JSONWriter(std::ostream *os) #else explicit JSONWriter(std::string *os) #endif : os_(os) {} /*! * \brief Write a string that do not contain escape characters. * \param s the string to be written. */ inline void WriteNoEscape(const std::string &s); /*! * \brief Write a string that can contain escape characters. * \param s the string to be written. */ inline void WriteString(const std::string &s); /*! * \brief Write a string that can contain escape characters. * \param v the value to be written. * \tparam ValueType The value type to be written. */ template inline void WriteNumber(const ValueType &v); /*! * \brief Start beginning of array. * \param multi_line whether to start an multi_line array. * \code * writer->BeginArray(); * for (auto& v : vdata) { * writer->WriteArrayItem(v); * } * writer->EndArray(); * \endcode */ inline void BeginArray(bool multi_line = true); /*! \brief Finish writing an array. */ inline void EndArray(); /*! * \brief Start beginning of array. * \param multi_line whether to start an multi_line array. * \code * writer->BeginObject(); * for (auto& kv : vmap) { * writer->WriteObjectKeyValue(kv.first, kv.second); * } * writer->EndObject(); * \endcode */ inline void BeginObject(bool multi_line = true); /*! \brief Finish writing object. */ inline void EndObject(); /*! * \brief Write key value pair in the object. * \param key the key of the object. * \param value the value of to be written. * \tparam ValueType The value type to be written. */ template inline void WriteObjectKeyValue(const std::string &key, const ValueType &value); /*! * \brief Write seperator of array, before writing next element. * User can proceed to call writer->Write to write next item */ inline void WriteArraySeperator(); /*! * \brief Write value into array. * \param value The value of to be written. * \tparam ValueType The value type to be written. */ template inline void WriteArrayItem(const ValueType &value); /*! * \brief Write value to json. * \param value any STL or json readable that can be written. * \tparam ValueType the data type to be write. */ template inline void Write(const ValueType &value); private: #ifndef _LIBCPP_SGX_NO_IOSTREAMS /*! \brief Output stream */ std::ostream *os_; #else std::string *os_; #endif /*! * \brief record how many element processed in * current array/object scope. */ std::vector scope_counter_; /*! \brief Record whether current is a multiline scope */ std::vector scope_multi_line_; /*! * \brief Write seperating space and newlines */ inline void WriteSeperator(); }; /*! * \brief Helper class to read JSON into a class or struct object. * \code * struct Param { * std::string name; * int value; * // define load function from JSON * inline void Load(dmlc::JSONReader *reader) { * dmlc::JSONStructReadHelper helper; * helper.DeclareField("name", &name); * helper.DeclareField("value", &value); * helper.ReadAllFields(reader); * } * }; * \endcode */ class JSONObjectReadHelper { public: /*! * \brief Declare field of type T * \param key the key of the of field. * \param addr address of the data type. * \tparam T the data type to be read, must be STL composition of JSON serializable. */ template inline void DeclareField(const std::string &key, T *addr) { DeclareFieldInternal(key, addr, false); } /*! * \brief Declare optional field of type T * \param key the key of the of field. * \param addr address of the data type. * \tparam T the data type to be read, must be STL composition of JSON serializable. */ template inline void DeclareOptionalField(const std::string &key, T *addr) { DeclareFieldInternal(key, addr, true); } /*! * \brief Read in all the declared fields. * \param reader the JSONReader to read the json. */ inline void ReadAllFields(JSONReader *reader); private: /*! * \brief Internal function to declare field. * \param key the key of the of field. * \param addr address of the data type. * \param optional if set to true, no error will be reported if the key is not presented. * \tparam T the data type to be read, must be STL composition of JSON serializable. */ template inline void DeclareFieldInternal(const std::string &key, T *addr, bool optional); /*! * \brief The internal reader function. * \param reader The reader to read. * \param addr The memory address to read. */ template inline static void ReaderFunction(JSONReader *reader, void *addr); /*! \brief callback type to reader function */ typedef void (*ReadFunction)(JSONReader *reader, void *addr); /*! \brief internal data entry */ struct Entry { /*! \brief the reader function */ ReadFunction func; /*! \brief the address to read */ void *addr; /*! \brief whether it is optional */ bool optional; }; /*! \brief the internal map of reader callbacks */ std::map map_; }; #define DMLC_JSON_ENABLE_ANY_VAR_DEF(KeyName) \ static DMLC_ATTRIBUTE_UNUSED ::dmlc::json::AnyJSONManager& \ __make_AnyJSONType ## _ ## KeyName ## __ /*! * \def DMLC_JSON_ENABLE_ANY * \brief Macro to enable save/load JSON of dmlc:: whose actual type is Type. * Any type will be saved as json array [KeyName, content] * * \param Type The type to be registered. * \param KeyName The Type key assigned to the type, must be same during load. */ #define DMLC_JSON_ENABLE_ANY(Type, KeyName) \ DMLC_STR_CONCAT(DMLC_JSON_ENABLE_ANY_VAR_DEF(KeyName), __COUNTER__) = \ ::dmlc::json::AnyJSONManager::Global()->EnableType(#KeyName) \ //! \cond Doxygen_Suppress namespace json { /*! * \brief generic serialization handler * \tparam T the type to be serialized */ template struct Handler; template struct NumericHandler { inline static void Write(JSONWriter *writer, const ValueType &value) { writer->WriteNumber(value); } inline static void Read(JSONReader *reader, ValueType *value) { reader->ReadNumber(value); } }; template struct ArrayHandler { inline static void Write(JSONWriter *writer, const ContainerType &array) { typedef typename ContainerType::value_type ElemType; writer->BeginArray(array.size() > 10 || !dmlc::is_pod::value); for (typename ContainerType::const_iterator it = array.begin(); it != array.end(); ++it) { writer->WriteArrayItem(*it); } writer->EndArray(); } inline static void Read(JSONReader *reader, ContainerType *array) { typedef typename ContainerType::value_type ElemType; array->clear(); reader->BeginArray(); while (reader->NextArrayItem()) { ElemType value; Handler::Read(reader, &value); array->insert(array->end(), value); } } }; template struct MapHandler{ inline static void Write(JSONWriter *writer, const ContainerType &map) { writer->BeginObject(map.size() > 1); for (typename ContainerType::const_iterator it = map.begin(); it != map.end(); ++it) { writer->WriteObjectKeyValue(it->first, it->second); } writer->EndObject(); } inline static void Read(JSONReader *reader, ContainerType *map) { typedef typename ContainerType::mapped_type ElemType; map->clear(); reader->BeginObject(); std::string key; while (reader->NextObjectItem(&key)) { ElemType value; reader->Read(&value); (*map)[key] = value; } } }; template struct CommonJSONSerializer { inline static void Write(JSONWriter *writer, const T &value) { value.Save(writer); } inline static void Read(JSONReader *reader, T *value) { value->Load(reader); } }; template<> struct Handler { inline static void Write(JSONWriter *writer, const std::string &value) { writer->WriteString(value); } inline static void Read(JSONReader *reader, std::string *str) { reader->ReadString(str); } }; template struct Handler > : public ArrayHandler > { }; template struct Handler > { inline static void Write(JSONWriter *writer, const std::pair &kv) { writer->BeginArray(); writer->WriteArrayItem(kv.first); writer->WriteArrayItem(kv.second); writer->EndArray(); } inline static void Read(JSONReader *reader, std::pair *kv) { reader->BeginArray(); CHECK(reader->NextArrayItem()) << "Expect array of length 2"; Handler::Read(reader, &(kv->first)); CHECK(reader->NextArrayItem()) << "Expect array of length 2"; Handler::Read(reader, &(kv->second)); CHECK(!reader->NextArrayItem()) << "Expect array of length 2"; } }; template struct Handler > : public ArrayHandler > { }; template struct Handler > : public MapHandler > { }; #if DMLC_USE_CXX11 template struct Handler > : public MapHandler > { }; #endif // DMLC_USE_CXX11 template struct Handler { inline static void Write(JSONWriter *writer, const T &data) { typedef typename dmlc::IfThenElseType::value, NumericHandler, CommonJSONSerializer >::Type THandler; THandler::Write(writer, data); } inline static void Read(JSONReader *reader, T *data) { typedef typename dmlc::IfThenElseType::value, NumericHandler, CommonJSONSerializer >::Type THandler; THandler::Read(reader, data); } }; #if DMLC_STRICT_CXX11 #if DMLC_ENABLE_RTTI // Manager to store json serialization strategy. class AnyJSONManager { public: template inline AnyJSONManager& EnableType(const std::string& type_name) { // NOLINT(*) std::type_index tp = std::type_index(typeid(T)); if (type_name_.count(tp) != 0) { CHECK(type_name_.at(tp) == type_name) << "Type has already been registered as another typename " << type_name_.at(tp); return *this; } CHECK(type_map_.count(type_name) == 0) << "Type name " << type_name << " already registered in registry"; Entry e; e.read = ReadAny; e.write = WriteAny; type_name_[tp] = type_name; type_map_[type_name] = e; return *this; } // return global singleton inline static AnyJSONManager* Global() { static AnyJSONManager inst; return &inst; } private: AnyJSONManager() {} template inline static void WriteAny(JSONWriter *writer, const any &data) { writer->Write(dmlc::get(data)); } template inline static void ReadAny(JSONReader *reader, any* data) { T temp; reader->Read(&temp); *data = std::move(temp); } // data entry to store vtable for any type struct Entry { void (*read)(JSONReader* reader, any *data); void (*write)(JSONWriter* reader, const any& data); }; template friend struct Handler; std::unordered_map type_name_; std::unordered_map type_map_; }; template<> struct Handler { inline static void Write(JSONWriter *writer, const any &data) { std::unordered_map& nmap = AnyJSONManager::Global()->type_name_; std::type_index id = std::type_index(data.type()); auto it = nmap.find(id); CHECK(it != nmap.end() && it->first == id) << "Type " << id.name() << " has not been registered via DMLC_JSON_ENABLE_ANY"; std::string type_name = it->second; AnyJSONManager::Entry e = AnyJSONManager::Global()->type_map_.at(type_name); writer->BeginArray(false); writer->WriteArrayItem(type_name); writer->WriteArraySeperator(); e.write(writer, data); writer->EndArray(); } inline static void Read(JSONReader *reader, any *data) { std::string type_name; reader->BeginArray(); CHECK(reader->NextArrayItem()) << "invalid any json format"; Handler::Read(reader, &type_name); std::unordered_map& tmap = AnyJSONManager::Global()->type_map_; auto it = tmap.find(type_name); CHECK(it != tmap.end() && it->first == type_name) << "Typename " << type_name << " has not been registered via DMLC_JSON_ENABLE_ANY"; AnyJSONManager::Entry e = it->second; CHECK(reader->NextArrayItem()) << "invalid any json format"; e.read(reader, data); CHECK(!reader->NextArrayItem()) << "invalid any json format"; } }; #endif // DMLC_ENABLE_RTTI #endif // DMLC_STRICT_CXX11 } // namespace json // implementations of JSONReader/Writer inline int JSONReader::NextChar() { #ifndef _LIBCPP_SGX_NO_IOSTREAMS return is_->get(); #else int ch = is_->at(0); is_->erase(0, 1); return ch; #endif } inline int JSONReader::PeekNextChar() { #ifndef _LIBCPP_SGX_NO_IOSTREAMS return is_->peek(); #else return is_->at(0); #endif } inline int JSONReader::NextNonSpace() { int ch; do { ch = NextChar(); if (ch == '\n') ++line_count_n_; if (ch == '\r') ++line_count_r_; } while (isspace(ch)); return ch; } inline int JSONReader::PeekNextNonSpace() { int ch; while (true) { ch = PeekNextChar(); if (ch == '\n') ++line_count_n_; if (ch == '\r') ++line_count_r_; if (!isspace(ch)) break; NextChar(); } return ch; } namespace { template #ifndef _LIBCPP_SGX_NO_IOSTREAMS void Extend(std::ostream *os, T item) { *os << item; } #else void Extend(std::string *ostr, T item) { *ostr += item; } #endif } // namespace inline void JSONReader::ReadString(std::string *out_str) { int ch = NextNonSpace(); CHECK_EQ(ch, '\"') << "Error at" << line_info() << ", Expect \'\"\' but get \'" << static_cast(ch) << '\''; #ifndef _LIBCPP_SGX_NO_IOSTREAMS std::ostringstream output; #else std::string output = ""; #endif while (true) { ch = NextChar(); if (ch == '\\') { char sch = static_cast(NextChar()); switch (sch) { case 'r': Extend(&output, "\r"); break; case 'n': Extend(&output, "\n"); break; case '\\': Extend(&output, "\\"); break; case 't': Extend(&output, "\t"); break; case '\"': Extend(&output, "\""); break; default: LOG(FATAL) << "unknown string escape \\" << sch; } } else { if (ch == '\"') break; Extend(&output, static_cast(ch)); } if (ch == EOF || ch == '\r' || ch == '\n') { LOG(FATAL) << "Error at" << line_info() << ", Expect \'\"\' but reach end of line "; } } #ifndef _LIBCPP_SGX_NO_IOSTREAMS *out_str = output.str(); #else *out_str = output; #endif } template inline void JSONReader::ReadNumber(ValueType *out_value) { #ifndef _LIBCPP_SGX_NO_IOSTREAMS *is_ >> *out_value; CHECK(!is_->fail()) << "Error at" << line_info() << ", Expect number"; #else char* endptr; const char* icstr = is_->c_str(); unsigned number = strtol(icstr, &endptr, 10); is_->erase(0, endptr - icstr); *out_value = static_cast(number); #endif } inline void JSONReader::BeginObject() { int ch = NextNonSpace(); CHECK_EQ(ch, '{') << "Error at" << line_info() << ", Expect \'{\' but get \'" << static_cast(ch) << '\''; scope_counter_.push_back(0); } inline void JSONReader::BeginArray() { int ch = NextNonSpace(); CHECK_EQ(ch, '[') << "Error at" << line_info() << ", Expect \'{\' but get \'" << static_cast(ch) << '\''; scope_counter_.push_back(0); } inline bool JSONReader::NextObjectItem(std::string *out_key) { bool next = true; if (scope_counter_.back() != 0) { int ch = NextNonSpace(); if (ch == EOF) { next = false; } else if (ch == '}') { next = false; } else { CHECK_EQ(ch, ',') << "Error at" << line_info() << ", JSON object expect \'}\' or \',\' \'" << static_cast(ch) << '\''; } } else { int ch = PeekNextNonSpace(); if (ch == '}') { NextChar(); next = false; } } if (!next) { scope_counter_.pop_back(); return false; } else { scope_counter_.back() += 1; ReadString(out_key); int ch = NextNonSpace(); CHECK_EQ(ch, ':') << "Error at" << line_info() << ", Expect \':\' but get \'" << static_cast(ch) << '\''; return true; } } inline bool JSONReader::NextArrayItem() { bool next = true; if (scope_counter_.back() != 0) { int ch = NextNonSpace(); if (ch == EOF) { next = false; } else if (ch == ']') { next = false; } else { CHECK_EQ(ch, ',') << "Error at" << line_info() << ", JSON array expect \']\' or \',\'. Get \'" << static_cast(ch) << "\' instead"; } } else { int ch = PeekNextNonSpace(); if (ch == ']') { NextChar(); next = false; } } if (!next) { scope_counter_.pop_back(); return false; } else { scope_counter_.back() += 1; return true; } } template inline void JSONReader::Read(ValueType *out_value) { json::Handler::Read(this, out_value); } inline void JSONWriter::WriteNoEscape(const std::string &s) { Extend(os_, '\"'); Extend(os_, s); Extend(os_, '\"'); } inline void JSONWriter::WriteString(const std::string &s) { Extend(os_, '\"'); for (size_t i = 0; i < s.length(); ++i) { char ch = s[i]; switch (ch) { case '\r': Extend(os_, "\\r"); break; case '\n': Extend(os_, "\\n"); break; case '\\': Extend(os_, "\\\\"); break; case '\t': Extend(os_, "\\t"); break; case '\"': Extend(os_, "\\\""); break; default: Extend(os_, ch); } } Extend(os_, '\"'); } template inline void JSONWriter::WriteNumber(const ValueType &v) { #ifndef _LIBCPP_SGX_NO_IOSTREAMS Extend(os_, v); #else Extend(os_, std::to_string(v)); #endif } inline void JSONWriter::BeginArray(bool multi_line) { Extend(os_, '['); scope_multi_line_.push_back(multi_line); scope_counter_.push_back(0); } inline void JSONWriter::EndArray() { CHECK_NE(scope_multi_line_.size(), 0U); CHECK_NE(scope_counter_.size(), 0U); bool newline = scope_multi_line_.back(); size_t nelem = scope_counter_.back(); scope_multi_line_.pop_back(); scope_counter_.pop_back(); if (newline && nelem != 0) WriteSeperator(); Extend(os_, ']'); } inline void JSONWriter::BeginObject(bool multi_line) { Extend(os_, '{'); scope_multi_line_.push_back(multi_line); scope_counter_.push_back(0); } inline void JSONWriter::EndObject() { CHECK_NE(scope_multi_line_.size(), 0U); CHECK_NE(scope_counter_.size(), 0U); bool newline = scope_multi_line_.back(); size_t nelem = scope_counter_.back(); scope_multi_line_.pop_back(); scope_counter_.pop_back(); if (newline && nelem != 0) WriteSeperator(); Extend(os_, '}'); } template inline void JSONWriter::WriteObjectKeyValue(const std::string &key, const ValueType &value) { if (scope_counter_.back() > 0) { Extend(os_, ", "); } WriteSeperator(); Extend(os_, '\"'); Extend(os_, key); Extend(os_, "\": "); scope_counter_.back() += 1; json::Handler::Write(this, value); } inline void JSONWriter::WriteArraySeperator() { if (scope_counter_.back() != 0) { Extend(os_, ", "); } scope_counter_.back() += 1; WriteSeperator(); } template inline void JSONWriter::WriteArrayItem(const ValueType &value) { this->WriteArraySeperator(); json::Handler::Write(this, value); } template inline void JSONWriter::Write(const ValueType &value) { size_t nscope = scope_multi_line_.size(); json::Handler::Write(this, value); CHECK_EQ(nscope, scope_multi_line_.size()) << "Uneven scope, did you call EndArray/EndObject after each BeginObject/Array?"; } inline void JSONWriter::WriteSeperator() { if (scope_multi_line_.size() == 0 || scope_multi_line_.back()) { Extend(os_, '\n'); Extend(os_, std::string(scope_multi_line_.size() * 2, ' ')); } } inline void JSONObjectReadHelper::ReadAllFields(JSONReader *reader) { reader->BeginObject(); std::map visited; std::string key; while (reader->NextObjectItem(&key)) { if (map_.count(key) != 0) { Entry e = map_[key]; (*e.func)(reader, e.addr); visited[key] = 0; } else { #ifndef _LIBCPP_SGX_NO_IOSTREAMS std::ostringstream err; #else std::string err(""); #endif Extend(&err, "JSONReader: Unknown field "); Extend(&err, key); Extend(&err, ", candidates are: \n"); for (std::map::iterator it = map_.begin(); it != map_.end(); ++it) { Extend(&err, '\"'); Extend(&err, it->first); Extend(&err, "\"\n"); } #ifndef _LIBCPP_SGX_NO_IOSTREAMS LOG(FATAL) << err.str(); #else LOG(FATAL) << err; #endif } } if (visited.size() != map_.size()) { for (std::map::iterator it = map_.begin(); it != map_.end(); ++it) { if (it->second.optional) continue; CHECK_NE(visited.count(it->first), 0U) << "JSONReader: Missing field \"" << it->first << "\"\n At " << reader->line_info(); } } } template inline void JSONObjectReadHelper::ReaderFunction(JSONReader *reader, void *addr) { json::Handler::Read(reader, static_cast(addr)); } template inline void JSONObjectReadHelper:: DeclareFieldInternal(const std::string &key, T *addr, bool optional) { CHECK_EQ(map_.count(key), 0U) << "Adding duplicate field " << key; Entry e; e.func = ReaderFunction; e.addr = static_cast(addr); e.optional = optional; map_[key] = e; } //! \endcond } // namespace dmlc #endif // DMLC_JSON_H_