#ifndef OSMIUM_IO_DETAIL_PBF_DECODER_HPP #define OSMIUM_IO_DETAIL_PBF_DECODER_HPP /* This file is part of Osmium (https://osmcode.org/libosmium). Copyright 2013-2022 Jochen Topf and others (see README). Boost Software License - Version 1.0 - August 17th, 2003 Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include // IWYU pragma: export #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef OSMIUM_WITH_LZ4 # include #endif #include #include #include namespace osmium { namespace builder { class Builder; } // namespace builder namespace io { namespace detail { using protozero::data_view; class varint_range { const char* m_data = nullptr; const char* m_end = nullptr; std::uint64_t next() { return protozero::decode_varint(&m_data, m_end); } public: varint_range() = default; explicit varint_range(const data_view& data) : m_data(data.data()), m_end(data.data() + data.size()) { } bool empty() const noexcept { return m_data == m_end; } std::size_t size() const noexcept { if (!m_data) { return 0; } // We know that each varint contains exactly one byte with the most // significant bit not set. We can use this to quickly figure out // how many varints there are without actually decoding the varints. return std::count_if(m_data, m_end, [](char c) noexcept { return (static_cast(c) & 0x80U) == 0; }); } std::int32_t next_int32() { return static_cast(next()); } std::uint32_t next_uint32() { return static_cast(next()); } std::int32_t next_sint32() { return protozero::decode_zigzag32(static_cast(next())); } std::int64_t next_sint64() { return protozero::decode_zigzag64(next()); } }; // class varint_range using osm_string_len_type = std::pair; class PBFPrimitiveBlockDecoder { enum { initial_buffer_size = 64UL * 1024UL }; data_view m_data; std::vector m_stringtable; int64_t m_lon_offset = 0; int64_t m_lat_offset = 0; int64_t m_date_factor = 1000; int32_t m_granularity = 100; osmium::osm_entity_bits::type m_read_types; osmium::memory::Buffer m_buffer{initial_buffer_size, osmium::memory::Buffer::auto_grow::internal}; osmium::io::read_meta m_read_metadata; void decode_stringtable(const data_view& data) { if (!m_stringtable.empty()) { throw osmium::pbf_error{"more than one stringtable in pbf file"}; } protozero::pbf_message pbf_string_table{data}; while (pbf_string_table.next(OSMFormat::StringTable::repeated_bytes_s, protozero::pbf_wire_type::length_delimited)) { const auto str_view = pbf_string_table.get_view(); if (str_view.size() > osmium::max_osm_string_length) { throw osmium::pbf_error{"overlong string in string table"}; } m_stringtable.emplace_back(str_view.data(), osmium::string_size_type(str_view.size())); } } void decode_primitive_block_metadata() { protozero::pbf_message pbf_primitive_block{m_data}; while (pbf_primitive_block.next()) { switch (pbf_primitive_block.tag_and_type()) { case protozero::tag_and_type(OSMFormat::PrimitiveBlock::required_StringTable_stringtable, protozero::pbf_wire_type::length_delimited): decode_stringtable(pbf_primitive_block.get_view()); break; case protozero::tag_and_type(OSMFormat::PrimitiveBlock::optional_int32_granularity, protozero::pbf_wire_type::varint): m_granularity = pbf_primitive_block.get_int32(); break; case protozero::tag_and_type(OSMFormat::PrimitiveBlock::optional_int32_date_granularity, protozero::pbf_wire_type::varint): m_date_factor = pbf_primitive_block.get_int32(); break; case protozero::tag_and_type(OSMFormat::PrimitiveBlock::optional_int64_lat_offset, protozero::pbf_wire_type::varint): m_lat_offset = pbf_primitive_block.get_int64(); break; case protozero::tag_and_type(OSMFormat::PrimitiveBlock::optional_int64_lon_offset, protozero::pbf_wire_type::varint): m_lon_offset = pbf_primitive_block.get_int64(); break; default: pbf_primitive_block.skip(); } } } void decode_primitive_block_data() { protozero::pbf_message pbf_primitive_block{m_data}; while (pbf_primitive_block.next(OSMFormat::PrimitiveBlock::repeated_PrimitiveGroup_primitivegroup, protozero::pbf_wire_type::length_delimited)) { protozero::pbf_message pbf_primitive_group = pbf_primitive_block.get_message(); while (pbf_primitive_group.next()) { switch (pbf_primitive_group.tag_and_type()) { case protozero::tag_and_type(OSMFormat::PrimitiveGroup::repeated_Node_nodes, protozero::pbf_wire_type::length_delimited): if (m_read_types & osmium::osm_entity_bits::node) { decode_node(pbf_primitive_group.get_view()); m_buffer.commit(); } else { pbf_primitive_group.skip(); } break; case protozero::tag_and_type(OSMFormat::PrimitiveGroup::optional_DenseNodes_dense, protozero::pbf_wire_type::length_delimited): if (m_read_types & osmium::osm_entity_bits::node) { if (m_read_metadata == osmium::io::read_meta::yes) { decode_dense_nodes(pbf_primitive_group.get_view()); } else { decode_dense_nodes_without_metadata(pbf_primitive_group.get_view()); } m_buffer.commit(); } else { pbf_primitive_group.skip(); } break; case protozero::tag_and_type(OSMFormat::PrimitiveGroup::repeated_Way_ways, protozero::pbf_wire_type::length_delimited): if (m_read_types & osmium::osm_entity_bits::way) { decode_way(pbf_primitive_group.get_view()); m_buffer.commit(); } else { pbf_primitive_group.skip(); } break; case protozero::tag_and_type(OSMFormat::PrimitiveGroup::repeated_Relation_relations, protozero::pbf_wire_type::length_delimited): if (m_read_types & osmium::osm_entity_bits::relation) { decode_relation(pbf_primitive_group.get_view()); m_buffer.commit(); } else { pbf_primitive_group.skip(); } break; default: pbf_primitive_group.skip(); } } } } osm_string_len_type decode_info(const data_view& data, osmium::OSMObject& object) { osm_string_len_type user{"", 0}; protozero::pbf_message pbf_info{data}; while (pbf_info.next()) { switch (pbf_info.tag_and_type()) { case protozero::tag_and_type(OSMFormat::Info::optional_int32_version, protozero::pbf_wire_type::varint): { const auto version = pbf_info.get_int32(); if (version < -1) { throw osmium::pbf_error{"object version must not be negative"}; } if (version == -1) { object.set_version(0U); } else { object.set_version(static_cast(version)); } } break; case protozero::tag_and_type(OSMFormat::Info::optional_int64_timestamp, protozero::pbf_wire_type::varint): object.set_timestamp(pbf_info.get_int64() * m_date_factor / 1000); break; case protozero::tag_and_type(OSMFormat::Info::optional_int64_changeset, protozero::pbf_wire_type::varint): { const auto changeset_id = pbf_info.get_int64(); if (changeset_id < -1 || changeset_id >= std::numeric_limits::max()) { throw osmium::pbf_error{"object changeset_id must be between 0 and 2^32-1"}; } if (changeset_id == -1) { object.set_changeset(0U); } else { object.set_changeset(static_cast(changeset_id)); } } break; case protozero::tag_and_type(OSMFormat::Info::optional_int32_uid, protozero::pbf_wire_type::varint): object.set_uid_from_signed(pbf_info.get_int32()); break; case protozero::tag_and_type(OSMFormat::Info::optional_uint32_user_sid, protozero::pbf_wire_type::varint): user = m_stringtable.at(pbf_info.get_uint32()); break; case protozero::tag_and_type(OSMFormat::Info::optional_bool_visible, protozero::pbf_wire_type::varint): object.set_visible(pbf_info.get_bool()); break; default: pbf_info.skip(); } } return user; } void build_tag_list(osmium::builder::Builder& parent, varint_range& keys, varint_range& vals) { if (keys.empty() || vals.empty()) { return; } osmium::builder::TagListBuilder builder{parent}; do { const auto& k = m_stringtable.at(keys.next_uint32()); const auto& v = m_stringtable.at(vals.next_uint32()); builder.add_tag(k.first, k.second, v.first, v.second); } while (!keys.empty() && !vals.empty()); } int32_t convert_pbf_lon(const int64_t c) const noexcept { return int32_t((c * m_granularity + m_lon_offset) / resolution_convert); } int32_t convert_pbf_lat(const int64_t c) const noexcept { return int32_t((c * m_granularity + m_lat_offset) / resolution_convert); } void decode_node(const data_view& data) { osmium::builder::NodeBuilder builder{m_buffer}; osmium::Node& node = builder.object(); varint_range keys; varint_range vals; int64_t lon = std::numeric_limits::max(); int64_t lat = std::numeric_limits::max(); osm_string_len_type user{"", 0}; protozero::pbf_message pbf_node{data}; while (pbf_node.next()) { switch (pbf_node.tag_and_type()) { case protozero::tag_and_type(OSMFormat::Node::required_sint64_id, protozero::pbf_wire_type::varint): node.set_id(pbf_node.get_sint64()); break; case protozero::tag_and_type(OSMFormat::Node::packed_uint32_keys, protozero::pbf_wire_type::length_delimited): keys = varint_range{pbf_node.get_view()}; break; case protozero::tag_and_type(OSMFormat::Node::packed_uint32_vals, protozero::pbf_wire_type::length_delimited): vals = varint_range{pbf_node.get_view()}; break; case protozero::tag_and_type(OSMFormat::Node::optional_Info_info, protozero::pbf_wire_type::length_delimited): if (m_read_metadata == osmium::io::read_meta::yes) { user = decode_info(pbf_node.get_view(), builder.object()); } else { pbf_node.skip(); } break; case protozero::tag_and_type(OSMFormat::Node::required_sint64_lat, protozero::pbf_wire_type::varint): lat = pbf_node.get_sint64(); break; case protozero::tag_and_type(OSMFormat::Node::required_sint64_lon, protozero::pbf_wire_type::varint): lon = pbf_node.get_sint64(); break; default: pbf_node.skip(); } } if (node.visible()) { if (lon == std::numeric_limits::max() || lat == std::numeric_limits::max()) { throw osmium::pbf_error{"illegal coordinate format"}; } node.set_location(osmium::Location{ convert_pbf_lon(lon), convert_pbf_lat(lat) }); } builder.set_user(user.first, user.second); build_tag_list(builder, keys, vals); } void decode_way(const data_view& data) { osmium::builder::WayBuilder builder{m_buffer}; varint_range keys; varint_range vals; varint_range refs; varint_range lats; varint_range lons; osm_string_len_type user{"", 0}; protozero::pbf_message pbf_way{data}; while (pbf_way.next()) { switch (pbf_way.tag_and_type()) { case protozero::tag_and_type(OSMFormat::Way::required_int64_id, protozero::pbf_wire_type::varint): builder.object().set_id(pbf_way.get_int64()); break; case protozero::tag_and_type(OSMFormat::Way::packed_uint32_keys, protozero::pbf_wire_type::length_delimited): keys = varint_range{pbf_way.get_view()}; break; case protozero::tag_and_type(OSMFormat::Way::packed_uint32_vals, protozero::pbf_wire_type::length_delimited): vals = varint_range{pbf_way.get_view()}; break; case protozero::tag_and_type(OSMFormat::Way::optional_Info_info, protozero::pbf_wire_type::length_delimited): if (m_read_metadata == osmium::io::read_meta::yes) { user = decode_info(pbf_way.get_view(), builder.object()); } else { pbf_way.skip(); } break; case protozero::tag_and_type(OSMFormat::Way::packed_sint64_refs, protozero::pbf_wire_type::length_delimited): refs = varint_range{pbf_way.get_view()}; break; case protozero::tag_and_type(OSMFormat::Way::packed_sint64_lat, protozero::pbf_wire_type::length_delimited): lats = varint_range{pbf_way.get_view()}; break; case protozero::tag_and_type(OSMFormat::Way::packed_sint64_lon, protozero::pbf_wire_type::length_delimited): lons = varint_range{pbf_way.get_view()}; break; default: pbf_way.skip(); } } builder.set_user(user.first, user.second); if (!refs.empty()) { osmium::builder::WayNodeListBuilder wnl_builder{builder}; osmium::DeltaDecode ref; if (lats.empty()) { while (!refs.empty()) { wnl_builder.add_node_ref(ref.update(refs.next_sint64())); } } else { osmium::DeltaDecode lon; osmium::DeltaDecode lat; while (!refs.empty() && !lons.empty() && !lats.empty()) { wnl_builder.add_node_ref( ref.update(refs.next_sint64()), osmium::Location{convert_pbf_lon(lon.update(lons.next_sint64())), convert_pbf_lat(lat.update(lats.next_sint64()))} ); } } } build_tag_list(builder, keys, vals); } void decode_relation(const data_view& data) { osmium::builder::RelationBuilder builder{m_buffer}; varint_range keys; varint_range vals; varint_range roles; varint_range refs; varint_range types; osm_string_len_type user{"", 0}; protozero::pbf_message pbf_relation{data}; while (pbf_relation.next()) { switch (pbf_relation.tag_and_type()) { case protozero::tag_and_type(OSMFormat::Relation::required_int64_id, protozero::pbf_wire_type::varint): builder.object().set_id(pbf_relation.get_int64()); break; case protozero::tag_and_type(OSMFormat::Relation::packed_uint32_keys, protozero::pbf_wire_type::length_delimited): keys = varint_range{pbf_relation.get_view()}; break; case protozero::tag_and_type(OSMFormat::Relation::packed_uint32_vals, protozero::pbf_wire_type::length_delimited): vals = varint_range{pbf_relation.get_view()}; break; case protozero::tag_and_type(OSMFormat::Relation::optional_Info_info, protozero::pbf_wire_type::length_delimited): if (m_read_metadata == osmium::io::read_meta::yes) { user = decode_info(pbf_relation.get_view(), builder.object()); } else { pbf_relation.skip(); } break; case protozero::tag_and_type(OSMFormat::Relation::packed_int32_roles_sid, protozero::pbf_wire_type::length_delimited): roles = varint_range{pbf_relation.get_view()}; break; case protozero::tag_and_type(OSMFormat::Relation::packed_sint64_memids, protozero::pbf_wire_type::length_delimited): refs = varint_range{pbf_relation.get_view()}; break; case protozero::tag_and_type(OSMFormat::Relation::packed_MemberType_types, protozero::pbf_wire_type::length_delimited): types = varint_range{pbf_relation.get_view()}; break; default: pbf_relation.skip(); } } builder.set_user(user.first, user.second); if (!refs.empty()) { osmium::builder::RelationMemberListBuilder rml_builder{builder}; osmium::DeltaDecode ref; while (!roles.empty() && !refs.empty() && !types.empty()) { const auto& r = m_stringtable.at(roles.next_int32()); const int type = types.next_int32(); if (type < 0 || type > 2) { throw osmium::pbf_error{"unknown relation member type"}; } rml_builder.add_member( osmium::item_type(type + 1), ref.update(refs.next_sint64()), r.first, r.second ); } } build_tag_list(builder, keys, vals); } void build_tag_list_from_dense_nodes(osmium::builder::NodeBuilder& builder, varint_range& tags) { osmium::builder::TagListBuilder tl_builder{builder}; while (!tags.empty()) { const auto idx = tags.next_int32(); if (idx == 0) { return; } const auto& k = m_stringtable.at(idx); if (tags.empty()) { throw osmium::pbf_error{"PBF format error"}; // this is against the spec, keys/vals must come in pairs } const auto& v = m_stringtable.at(tags.next_int32()); tl_builder.add_tag(k.first, k.second, v.first, v.second); } } void decode_dense_nodes_without_metadata(const data_view& data) { varint_range ids; varint_range lats; varint_range lons; varint_range tags; protozero::pbf_message pbf_dense_nodes{data}; while (pbf_dense_nodes.next()) { switch (pbf_dense_nodes.tag_and_type()) { case protozero::tag_and_type(OSMFormat::DenseNodes::packed_sint64_id, protozero::pbf_wire_type::length_delimited): ids = varint_range{pbf_dense_nodes.get_view()}; break; case protozero::tag_and_type(OSMFormat::DenseNodes::packed_sint64_lat, protozero::pbf_wire_type::length_delimited): lats = varint_range{pbf_dense_nodes.get_view()}; break; case protozero::tag_and_type(OSMFormat::DenseNodes::packed_sint64_lon, protozero::pbf_wire_type::length_delimited): lons = varint_range{pbf_dense_nodes.get_view()}; break; case protozero::tag_and_type(OSMFormat::DenseNodes::packed_int32_keys_vals, protozero::pbf_wire_type::length_delimited): tags = varint_range{pbf_dense_nodes.get_view()}; break; default: pbf_dense_nodes.skip(); } } osmium::DeltaDecode dense_id; osmium::DeltaDecode dense_latitude; osmium::DeltaDecode dense_longitude; while (!ids.empty()) { if (lons.empty() || lats.empty()) { // this is against the spec, must have same number of elements throw osmium::pbf_error{"PBF format error"}; } { osmium::builder::NodeBuilder builder{m_buffer}; osmium::Node& node = builder.object(); node.set_id(dense_id.update(ids.next_sint64())); const auto lon = dense_longitude.update(lons.next_sint64()); const auto lat = dense_latitude.update(lats.next_sint64()); builder.object().set_location(osmium::Location{ convert_pbf_lon(lon), convert_pbf_lat(lat) }); if (!tags.empty()) { build_tag_list_from_dense_nodes(builder, tags); } } m_buffer.commit(); } } void decode_dense_nodes(const data_view& data) { bool has_info = false; varint_range ids; varint_range lats; varint_range lons; varint_range tags; varint_range versions; varint_range timestamps; varint_range changesets; varint_range uids; varint_range user_sids; varint_range visibles; protozero::pbf_message pbf_dense_nodes{data}; while (pbf_dense_nodes.next()) { switch (pbf_dense_nodes.tag_and_type()) { case protozero::tag_and_type(OSMFormat::DenseNodes::packed_sint64_id, protozero::pbf_wire_type::length_delimited): ids = varint_range{pbf_dense_nodes.get_view()}; break; case protozero::tag_and_type(OSMFormat::DenseNodes::optional_DenseInfo_denseinfo, protozero::pbf_wire_type::length_delimited): { has_info = true; protozero::pbf_message pbf_dense_info{pbf_dense_nodes.get_message()}; while (pbf_dense_info.next()) { switch (pbf_dense_info.tag_and_type()) { case protozero::tag_and_type(OSMFormat::DenseInfo::packed_int32_version, protozero::pbf_wire_type::length_delimited): versions = varint_range{pbf_dense_info.get_view()}; break; case protozero::tag_and_type(OSMFormat::DenseInfo::packed_sint64_timestamp, protozero::pbf_wire_type::length_delimited): timestamps = varint_range{pbf_dense_info.get_view()}; break; case protozero::tag_and_type(OSMFormat::DenseInfo::packed_sint64_changeset, protozero::pbf_wire_type::length_delimited): changesets = varint_range{pbf_dense_info.get_view()}; break; case protozero::tag_and_type(OSMFormat::DenseInfo::packed_sint32_uid, protozero::pbf_wire_type::length_delimited): uids = varint_range{pbf_dense_info.get_view()}; break; case protozero::tag_and_type(OSMFormat::DenseInfo::packed_sint32_user_sid, protozero::pbf_wire_type::length_delimited): user_sids = varint_range{pbf_dense_info.get_view()}; break; case protozero::tag_and_type(OSMFormat::DenseInfo::packed_bool_visible, protozero::pbf_wire_type::length_delimited): visibles = varint_range{pbf_dense_info.get_view()}; break; default: pbf_dense_info.skip(); } } } break; case protozero::tag_and_type(OSMFormat::DenseNodes::packed_sint64_lat, protozero::pbf_wire_type::length_delimited): lats = varint_range{pbf_dense_nodes.get_view()}; break; case protozero::tag_and_type(OSMFormat::DenseNodes::packed_sint64_lon, protozero::pbf_wire_type::length_delimited): lons = varint_range{pbf_dense_nodes.get_view()}; break; case protozero::tag_and_type(OSMFormat::DenseNodes::packed_int32_keys_vals, protozero::pbf_wire_type::length_delimited): tags = varint_range{pbf_dense_nodes.get_view()}; break; default: pbf_dense_nodes.skip(); } } osmium::DeltaDecode dense_id; osmium::DeltaDecode dense_latitude; osmium::DeltaDecode dense_longitude; osmium::DeltaDecode dense_uid; osmium::DeltaDecode dense_user_sid; osmium::DeltaDecode dense_changeset; osmium::DeltaDecode dense_timestamp; while (!ids.empty()) { if (lons.empty() || lats.empty()) { // this is against the spec, must have same number of elements throw osmium::pbf_error{"PBF format error"}; } { bool visible = true; osmium::builder::NodeBuilder builder{m_buffer}; osmium::Node& node = builder.object(); node.set_id(dense_id.update(ids.next_sint64())); if (has_info) { if (!versions.empty()) { const auto version = versions.next_int32(); if (version < -1) { throw osmium::pbf_error{"object version must not be negative"}; } if (version == -1) { node.set_version(0U); } else { node.set_version(static_cast(version)); } } if (!changesets.empty()) { const auto changeset_id = dense_changeset.update(changesets.next_sint64()); if (changeset_id < -1 || changeset_id >= std::numeric_limits::max()) { throw osmium::pbf_error{"object changeset_id must be between 0 and 2^32-1"}; } if (changeset_id == -1) { node.set_changeset(0U); } else { node.set_changeset(static_cast(changeset_id)); } } if (!timestamps.empty()) { node.set_timestamp(dense_timestamp.update(timestamps.next_sint64()) * m_date_factor / 1000); } if (!uids.empty()) { node.set_uid_from_signed(static_cast(dense_uid.update(uids.next_sint32()))); } if (!visibles.empty()) { visible = (visibles.next_int32() != 0); } node.set_visible(visible); if (!user_sids.empty()) { const auto& u = m_stringtable.at(dense_user_sid.update(user_sids.next_sint32())); builder.set_user(u.first, u.second); } } // even if the node isn't visible, there's still a record // of its lat/lon in the dense arrays. const auto lon = dense_longitude.update(lons.next_sint64()); const auto lat = dense_latitude.update(lats.next_sint64()); if (visible) { builder.object().set_location(osmium::Location{ convert_pbf_lon(lon), convert_pbf_lat(lat) }); } if (!tags.empty()) { build_tag_list_from_dense_nodes(builder, tags); } } m_buffer.commit(); } } public: PBFPrimitiveBlockDecoder(const data_view& data, const osmium::osm_entity_bits::type read_types, const osmium::io::read_meta read_metadata) : m_data(data), m_read_types(read_types), m_read_metadata(read_metadata) { } PBFPrimitiveBlockDecoder(const PBFPrimitiveBlockDecoder&) = delete; PBFPrimitiveBlockDecoder& operator=(const PBFPrimitiveBlockDecoder&) = delete; PBFPrimitiveBlockDecoder(PBFPrimitiveBlockDecoder&&) = delete; PBFPrimitiveBlockDecoder& operator=(PBFPrimitiveBlockDecoder&&) = delete; ~PBFPrimitiveBlockDecoder() noexcept = default; osmium::memory::Buffer operator()() { try { decode_primitive_block_metadata(); decode_primitive_block_data(); } catch (const std::out_of_range&) { throw osmium::pbf_error{"string id out of range"}; } return std::move(m_buffer); } }; // class PBFPrimitiveBlockDecoder inline data_view decode_blob(const std::string& blob_data, std::string& output) { int32_t raw_size = 0; protozero::data_view compressed_data; pbf_compression use_compression = pbf_compression::none; protozero::pbf_message pbf_blob{blob_data}; while (pbf_blob.next()) { switch (pbf_blob.tag_and_type()) { case protozero::tag_and_type(FileFormat::Blob::optional_bytes_raw, protozero::pbf_wire_type::length_delimited): { const auto data_len = pbf_blob.get_view(); if (data_len.size() > max_uncompressed_blob_size) { throw osmium::pbf_error{"illegal blob size"}; } return data_len; } case protozero::tag_and_type(FileFormat::Blob::optional_int32_raw_size, protozero::pbf_wire_type::varint): raw_size = pbf_blob.get_int32(); if (raw_size <= 0 || uint32_t(raw_size) > max_uncompressed_blob_size) { throw osmium::pbf_error{"illegal blob size"}; } break; case protozero::tag_and_type(FileFormat::Blob::optional_bytes_zlib_data, protozero::pbf_wire_type::length_delimited): use_compression = pbf_compression::zlib; compressed_data = pbf_blob.get_view(); break; case protozero::tag_and_type(FileFormat::Blob::optional_bytes_lzma_data, protozero::pbf_wire_type::length_delimited): throw osmium::pbf_error{"lzma blobs not supported"}; case protozero::tag_and_type(FileFormat::Blob::optional_bytes_lz4_data, protozero::pbf_wire_type::length_delimited): #ifdef OSMIUM_WITH_LZ4 use_compression = pbf_compression::lz4; compressed_data = pbf_blob.get_view(); break; #else throw osmium::pbf_error{"lz4 blobs not supported"}; #endif case protozero::tag_and_type(FileFormat::Blob::optional_bytes_zstd_data, protozero::pbf_wire_type::length_delimited): throw osmium::pbf_error{"zstd blobs not supported"}; default: throw osmium::pbf_error{"unknown compression"}; } } if (!compressed_data.empty() && raw_size != 0) { switch (use_compression) { case pbf_compression::none: break; case pbf_compression::zlib: return osmium::io::detail::zlib_uncompress_string( compressed_data.data(), static_cast(compressed_data.size()), // NOLINT(google-runtime-int) static_cast(raw_size), // NOLINT(google-runtime-int) output ); case pbf_compression::lz4: #ifdef OSMIUM_WITH_LZ4 return osmium::io::detail::lz4_uncompress_string( compressed_data.data(), static_cast(compressed_data.size()), // NOLINT(google-runtime-int) static_cast(raw_size), // NOLINT(google-runtime-int) output ); #else break; #endif } std::abort(); // should never be here } throw osmium::pbf_error{"blob contains no data"}; } inline osmium::Box decode_header_bbox(const data_view& data) { int64_t left = std::numeric_limits::max(); int64_t right = std::numeric_limits::max(); int64_t top = std::numeric_limits::max(); int64_t bottom = std::numeric_limits::max(); protozero::pbf_message pbf_header_bbox{data}; while (pbf_header_bbox.next()) { switch (pbf_header_bbox.tag_and_type()) { case protozero::tag_and_type(OSMFormat::HeaderBBox::required_sint64_left, protozero::pbf_wire_type::varint): left = pbf_header_bbox.get_sint64(); break; case protozero::tag_and_type(OSMFormat::HeaderBBox::required_sint64_right, protozero::pbf_wire_type::varint): right = pbf_header_bbox.get_sint64(); break; case protozero::tag_and_type(OSMFormat::HeaderBBox::required_sint64_top, protozero::pbf_wire_type::varint): top = pbf_header_bbox.get_sint64(); break; case protozero::tag_and_type(OSMFormat::HeaderBBox::required_sint64_bottom, protozero::pbf_wire_type::varint): bottom = pbf_header_bbox.get_sint64(); break; default: pbf_header_bbox.skip(); } } if (left == std::numeric_limits::max() || right == std::numeric_limits::max() || top == std::numeric_limits::max() || bottom == std::numeric_limits::max()) { throw osmium::pbf_error{"invalid bbox"}; } osmium::Box box; box.extend(osmium::Location{left / resolution_convert, bottom / resolution_convert}); box.extend(osmium::Location{right / resolution_convert, top / resolution_convert}); return box; } inline osmium::io::Header decode_header_block(const data_view& data) { osmium::io::Header header; int i = 0; protozero::pbf_message pbf_header_block{data}; while (pbf_header_block.next()) { switch (pbf_header_block.tag_and_type()) { case protozero::tag_and_type(OSMFormat::HeaderBlock::optional_HeaderBBox_bbox, protozero::pbf_wire_type::length_delimited): header.add_box(decode_header_bbox(pbf_header_block.get_view())); break; case protozero::tag_and_type(OSMFormat::HeaderBlock::repeated_string_required_features, protozero::pbf_wire_type::length_delimited): { auto feature = pbf_header_block.get_view(); if (!std::strncmp("OsmSchema-V0.6", feature.data(), feature.size())) { // intentionally left blank } else if (!std::strncmp("DenseNodes", feature.data(), feature.size())) { header.set("pbf_dense_nodes", true); } else if (!std::strncmp("HistoricalInformation", feature.data(), feature.size())) { header.set_has_multiple_object_versions(true); } else { std::string msg{"required feature not supported: "}; msg.append(feature.data(), feature.size()); throw osmium::pbf_error{msg}; } } break; case protozero::tag_and_type(OSMFormat::HeaderBlock::repeated_string_optional_features, protozero::pbf_wire_type::length_delimited): { const auto opt = pbf_header_block.get_string(); header.set("pbf_optional_feature_" + std::to_string(i++), opt); if (opt == "Sort.Type_then_ID") { header.set("sorting", "Type_then_ID"); } } break; case protozero::tag_and_type(OSMFormat::HeaderBlock::optional_string_writingprogram, protozero::pbf_wire_type::length_delimited): header.set("generator", pbf_header_block.get_string()); break; case protozero::tag_and_type(OSMFormat::HeaderBlock::optional_int64_osmosis_replication_timestamp, protozero::pbf_wire_type::varint): { const auto timestamp = osmium::Timestamp{pbf_header_block.get_int64()}.to_iso(); header.set("osmosis_replication_timestamp", timestamp); header.set("timestamp", timestamp); } break; case protozero::tag_and_type(OSMFormat::HeaderBlock::optional_int64_osmosis_replication_sequence_number, protozero::pbf_wire_type::varint): header.set("osmosis_replication_sequence_number", std::to_string(pbf_header_block.get_int64())); break; case protozero::tag_and_type(OSMFormat::HeaderBlock::optional_string_osmosis_replication_base_url, protozero::pbf_wire_type::length_delimited): header.set("osmosis_replication_base_url", pbf_header_block.get_string()); break; default: pbf_header_block.skip(); } } return header; } /** * Decode HeaderBlock. * * @param header_block_data Input data * @returns Header object * @throws osmium::pbf_error If there was a parsing error */ inline osmium::io::Header decode_header(const std::string& header_block_data) { std::string output; return decode_header_block(decode_blob(header_block_data, output)); } class PBFDataBlobDecoder { std::shared_ptr m_input_buffer; osmium::osm_entity_bits::type m_read_types; osmium::io::read_meta m_read_metadata; public: PBFDataBlobDecoder(std::string&& input_buffer, const osmium::osm_entity_bits::type read_types, const osmium::io::read_meta read_metadata) : m_input_buffer(std::make_shared(std::move(input_buffer))), m_read_types(read_types), m_read_metadata(read_metadata) { } osmium::memory::Buffer operator()() { std::string output; PBFPrimitiveBlockDecoder decoder{decode_blob(*m_input_buffer, output), m_read_types, m_read_metadata}; return decoder(); } }; // class PBFDataBlobDecoder } // namespace detail } // namespace io } // namespace osmium #endif // OSMIUM_IO_DETAIL_PBF_DECODER_HPP