diff --git a/README.md b/README.md index 0cb8cffe1e5be9f65c14686f6bfd61f69473995a..ba0b704a4a99c20e874c4c8cd4c45df5313052e4 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ include(FetchContent) FetchContent_Declare(json GIT_REPOSITORY https://github.com/ArthurSonzogni/nlohman_json - GIT_TAG v3.3.0) + GIT_TAG v3.4.0) FetchContent_GetProperties(json) if(NOT json_POPULATED) diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index c40620ad63f13968fa7be39b17bc7af0073b73dc..1e7cf51e0ac88e186ebfc73e67178c617d9de7f8 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -1,7 +1,7 @@ /* __ _____ _____ _____ __| | __| | | | JSON for Modern C++ -| | |__ | | | | | | version 3.3.0 +| | |__ | | | | | | version 3.4.0 |_____|_____|_____|_|___| https://github.com/nlohmann/json Licensed under the MIT License <http://opensource.org/licenses/MIT>. @@ -31,7 +31,7 @@ SOFTWARE. #define NLOHMANN_JSON_HPP #define NLOHMANN_JSON_VERSION_MAJOR 3 -#define NLOHMANN_JSON_VERSION_MINOR 3 +#define NLOHMANN_JSON_VERSION_MINOR 4 #define NLOHMANN_JSON_VERSION_PATCH 0 #include <algorithm> // all_of, find, for_each @@ -108,7 +108,7 @@ uses the standard template types. @since version 1.0.0 */ using json = basic_json<>; -} +} // namespace nlohmann #endif @@ -202,6 +202,37 @@ using json = basic_json<>; #define JSON_HAS_CPP_14 #endif +/*! +@brief macro to briefly define a mapping between an enum and JSON +@def NLOHMANN_JSON_SERIALIZE_ENUM +@since version 3.4.0 +*/ +#define NLOHMANN_JSON_SERIALIZE_ENUM(ENUM_TYPE, ...) \ + template<typename BasicJsonType> \ + inline void to_json(BasicJsonType& j, const ENUM_TYPE& e) \ + { \ + static_assert(std::is_enum<ENUM_TYPE>::value, #ENUM_TYPE " must be an enum!"); \ + static const std::pair<ENUM_TYPE, BasicJsonType> m[] = __VA_ARGS__; \ + auto it = std::find_if(std::begin(m), std::end(m), \ + [e](const std::pair<ENUM_TYPE, BasicJsonType>& ej_pair) -> bool \ + { \ + return ej_pair.first == e; \ + }); \ + j = ((it != std::end(m)) ? it : std::begin(m))->second; \ + } \ + template<typename BasicJsonType> \ + inline void from_json(const BasicJsonType& j, ENUM_TYPE& e) \ + { \ + static_assert(std::is_enum<ENUM_TYPE>::value, #ENUM_TYPE " must be an enum!"); \ + static const std::pair<ENUM_TYPE, BasicJsonType> m[] = __VA_ARGS__; \ + auto it = std::find_if(std::begin(m), std::end(m), \ + [j](const std::pair<ENUM_TYPE, BasicJsonType>& ej_pair) -> bool \ + { \ + return ej_pair.second == j; \ + }); \ + e = ((it != std::end(m)) ? it : std::begin(m))->first; \ + } + // Ugly macros to avoid uglier copy-paste when specializing basic_json. They // may be removed in the future once the class is split. @@ -280,8 +311,8 @@ struct static_const template<typename T> constexpr T static_const<T>::value; -} -} +} // namespace detail +} // namespace nlohmann // #include <nlohmann/detail/meta/type_traits.hpp> @@ -312,8 +343,8 @@ template <typename ...Ts> struct make_void using type = void; }; template <typename ...Ts> using void_t = typename make_void<Ts...>::type; -} -} +} // namespace detail +} // namespace nlohmann // http://en.cppreference.com/w/cpp/experimental/is_detected @@ -364,8 +395,8 @@ using is_detected_exact = std::is_same<Expected, detected_t<Op, Args...>>; template <class To, template <class...> class Op, class... Args> using is_detected_convertible = std::is_convertible<detected_t<Op, Args...>, To>; -} -} +} // namespace detail +} // namespace nlohmann // #include <nlohmann/detail/macro_scope.hpp> @@ -386,6 +417,15 @@ namespace detail // helpers // ///////////// +// Note to maintainers: +// +// Every trait in this file expects a non CV-qualified type. +// The only exceptions are in the 'aliases for detected' section +// (i.e. those of the form: decltype(T::member_function(std::declval<T>()))) +// +// In this case, T has to be properly CV-qualified to constraint the function arguments +// (e.g. to_json(BasicJsonType&, const T&)) + template<typename> struct is_basic_json : std::false_type {}; NLOHMANN_BASIC_JSON_TPL_DECLARATION @@ -428,6 +468,52 @@ using from_json_function = decltype(T::from_json(std::declval<Args>()...)); template <typename T, typename U> using get_template_function = decltype(std::declval<T>().template get<U>()); +// trait checking if JSONSerializer<T>::from_json(json const&, udt&) exists +template <typename BasicJsonType, typename T, typename = void> +struct has_from_json : std::false_type {}; + +template <typename BasicJsonType, typename T> +struct has_from_json<BasicJsonType, T, + enable_if_t<not is_basic_json<T>::value>> +{ + using serializer = typename BasicJsonType::template json_serializer<T, void>; + + static constexpr bool value = + is_detected_exact<void, from_json_function, serializer, + const BasicJsonType&, T&>::value; +}; + +// This trait checks if JSONSerializer<T>::from_json(json const&) exists +// this overload is used for non-default-constructible user-defined-types +template <typename BasicJsonType, typename T, typename = void> +struct has_non_default_from_json : std::false_type {}; + +template<typename BasicJsonType, typename T> +struct has_non_default_from_json<BasicJsonType, T, enable_if_t<not is_basic_json<T>::value>> +{ + using serializer = typename BasicJsonType::template json_serializer<T, void>; + + static constexpr bool value = + is_detected_exact<T, from_json_function, serializer, + const BasicJsonType&>::value; +}; + +// This trait checks if BasicJsonType::json_serializer<T>::to_json exists +// Do not evaluate the trait when T is a basic_json type, to avoid template instantiation infinite recursion. +template <typename BasicJsonType, typename T, typename = void> +struct has_to_json : std::false_type {}; + +template <typename BasicJsonType, typename T> +struct has_to_json<BasicJsonType, T, enable_if_t<not is_basic_json<T>::value>> +{ + using serializer = typename BasicJsonType::template json_serializer<T, void>; + + static constexpr bool value = + is_detected_exact<void, to_json_function, serializer, BasicJsonType&, + T>::value; +}; + + /////////////////// // is_ functions // /////////////////// @@ -483,6 +569,30 @@ template <typename BasicJsonType, typename CompatibleObjectType> struct is_compatible_object_type : is_compatible_object_type_impl<BasicJsonType, CompatibleObjectType> {}; +template <typename BasicJsonType, typename ConstructibleObjectType, + typename = void> +struct is_constructible_object_type_impl : std::false_type {}; + +template <typename BasicJsonType, typename ConstructibleObjectType> +struct is_constructible_object_type_impl < + BasicJsonType, ConstructibleObjectType, + enable_if_t<is_detected<mapped_type_t, ConstructibleObjectType>::value and + is_detected<key_type_t, ConstructibleObjectType>::value >> +{ + using object_t = typename BasicJsonType::object_t; + + static constexpr bool value = + (std::is_constructible<typename ConstructibleObjectType::key_type, typename object_t::key_type>::value and + std::is_same<typename object_t::mapped_type, typename ConstructibleObjectType::mapped_type>::value) or + (has_from_json<BasicJsonType, typename ConstructibleObjectType::mapped_type>::value or + has_non_default_from_json<BasicJsonType, typename ConstructibleObjectType::mapped_type >::value); +}; + +template <typename BasicJsonType, typename ConstructibleObjectType> +struct is_constructible_object_type + : is_constructible_object_type_impl<BasicJsonType, + ConstructibleObjectType> {}; + template <typename BasicJsonType, typename CompatibleStringType, typename = void> struct is_compatible_string_type_impl : std::false_type {}; @@ -497,9 +607,28 @@ struct is_compatible_string_type_impl < std::is_constructible<typename BasicJsonType::string_t, CompatibleStringType>::value; }; -template <typename BasicJsonType, typename CompatibleStringType> +template <typename BasicJsonType, typename ConstructibleStringType> struct is_compatible_string_type - : is_compatible_string_type_impl<BasicJsonType, CompatibleStringType> {}; + : is_compatible_string_type_impl<BasicJsonType, ConstructibleStringType> {}; + +template <typename BasicJsonType, typename ConstructibleStringType, + typename = void> +struct is_constructible_string_type_impl : std::false_type {}; + +template <typename BasicJsonType, typename ConstructibleStringType> +struct is_constructible_string_type_impl < + BasicJsonType, ConstructibleStringType, + enable_if_t<is_detected_exact<typename BasicJsonType::string_t::value_type, + value_type_t, ConstructibleStringType>::value >> +{ + static constexpr auto value = + std::is_constructible<ConstructibleStringType, + typename BasicJsonType::string_t>::value; +}; + +template <typename BasicJsonType, typename ConstructibleStringType> +struct is_constructible_string_type + : is_constructible_string_type_impl<BasicJsonType, ConstructibleStringType> {}; template <typename BasicJsonType, typename CompatibleArrayType, typename = void> struct is_compatible_array_type_impl : std::false_type {}; @@ -508,18 +637,61 @@ template <typename BasicJsonType, typename CompatibleArrayType> struct is_compatible_array_type_impl < BasicJsonType, CompatibleArrayType, enable_if_t<is_detected<value_type_t, CompatibleArrayType>::value and - is_detected<iterator_t, CompatibleArrayType>::value >> + is_detected<iterator_t, CompatibleArrayType>::value and +// This is needed because json_reverse_iterator has a ::iterator type... +// Therefore it is detected as a CompatibleArrayType. +// The real fix would be to have an Iterable concept. + not is_iterator_traits< + std::iterator_traits<CompatibleArrayType>>::value >> { - // This is needed because json_reverse_iterator has a ::iterator type... - // Therefore it is detected as a CompatibleArrayType. - // The real fix would be to have an Iterable concept. - static constexpr bool value = not is_iterator_traits<std::iterator_traits<CompatibleArrayType>>::value; + static constexpr bool value = + std::is_constructible<BasicJsonType, + typename CompatibleArrayType::value_type>::value; }; template <typename BasicJsonType, typename CompatibleArrayType> struct is_compatible_array_type : is_compatible_array_type_impl<BasicJsonType, CompatibleArrayType> {}; +template <typename BasicJsonType, typename ConstructibleArrayType, typename = void> +struct is_constructible_array_type_impl : std::false_type {}; + +template <typename BasicJsonType, typename ConstructibleArrayType> +struct is_constructible_array_type_impl < + BasicJsonType, ConstructibleArrayType, + enable_if_t<std::is_same<ConstructibleArrayType, + typename BasicJsonType::value_type>::value >> + : std::true_type {}; + +template <typename BasicJsonType, typename ConstructibleArrayType> +struct is_constructible_array_type_impl < + BasicJsonType, ConstructibleArrayType, + enable_if_t<not std::is_same<ConstructibleArrayType, + typename BasicJsonType::value_type>::value and + is_detected<value_type_t, ConstructibleArrayType>::value and + is_detected<iterator_t, ConstructibleArrayType>::value and + is_complete_type< + detected_t<value_type_t, ConstructibleArrayType>>::value >> +{ + static constexpr bool value = + // This is needed because json_reverse_iterator has a ::iterator type, + // furthermore, std::back_insert_iterator (and other iterators) have a base class `iterator`... + // Therefore it is detected as a ConstructibleArrayType. + // The real fix would be to have an Iterable concept. + not is_iterator_traits < + std::iterator_traits<ConstructibleArrayType >>::value and + + (std::is_same<typename ConstructibleArrayType::value_type, typename BasicJsonType::array_t::value_type>::value or + has_from_json<BasicJsonType, + typename ConstructibleArrayType::value_type>::value or + has_non_default_from_json < + BasicJsonType, typename ConstructibleArrayType::value_type >::value); +}; + +template <typename BasicJsonType, typename ConstructibleArrayType> +struct is_constructible_array_type + : is_constructible_array_type_impl<BasicJsonType, ConstructibleArrayType> {}; + template <typename RealIntegerType, typename CompatibleNumberIntegerType, typename = void> struct is_compatible_integer_type_impl : std::false_type {}; @@ -547,51 +719,6 @@ struct is_compatible_integer_type : is_compatible_integer_type_impl<RealIntegerType, CompatibleNumberIntegerType> {}; -// trait checking if JSONSerializer<T>::from_json(json const&, udt&) exists -template <typename BasicJsonType, typename T, typename = void> -struct has_from_json : std::false_type {}; - -template <typename BasicJsonType, typename T> -struct has_from_json<BasicJsonType, T, - enable_if_t<not is_basic_json<T>::value>> -{ - using serializer = typename BasicJsonType::template json_serializer<T, void>; - - static constexpr bool value = - is_detected_exact<void, from_json_function, serializer, - const BasicJsonType&, T&>::value; -}; - -// This trait checks if JSONSerializer<T>::from_json(json const&) exists -// this overload is used for non-default-constructible user-defined-types -template <typename BasicJsonType, typename T, typename = void> -struct has_non_default_from_json : std::false_type {}; - -template<typename BasicJsonType, typename T> -struct has_non_default_from_json<BasicJsonType, T, enable_if_t<not is_basic_json<T>::value>> -{ - using serializer = typename BasicJsonType::template json_serializer<T, void>; - - static constexpr bool value = - is_detected_exact<T, from_json_function, serializer, - const BasicJsonType&>::value; -}; - -// This trait checks if BasicJsonType::json_serializer<T>::to_json exists -// Do not evaluate the trait when T is a basic_json type, to avoid template instantiation infinite recursion. -template <typename BasicJsonType, typename T, typename = void> -struct has_to_json : std::false_type {}; - -template <typename BasicJsonType, typename T> -struct has_to_json<BasicJsonType, T, enable_if_t<not is_basic_json<T>::value>> -{ - using serializer = typename BasicJsonType::template json_serializer<T, void>; - - static constexpr bool value = - is_detected_exact<void, to_json_function, serializer, BasicJsonType&, - T>::value; -}; - template <typename BasicJsonType, typename CompatibleType, typename = void> struct is_compatible_type_impl: std::false_type {}; @@ -607,8 +734,8 @@ struct is_compatible_type_impl < template <typename BasicJsonType, typename CompatibleType> struct is_compatible_type : is_compatible_type_impl<BasicJsonType, CompatibleType> {}; -} -} +} // namespace detail +} // namespace nlohmann // #include <nlohmann/detail/exceptions.hpp> @@ -617,6 +744,36 @@ struct is_compatible_type #include <stdexcept> // runtime_error #include <string> // to_string +// #include <nlohmann/detail/input/position_t.hpp> + + +#include <cstddef> // size_t + +namespace nlohmann +{ +namespace detail +{ +/// struct to capture the start position of the current token +struct position_t +{ + /// the total number of characters read + std::size_t chars_read_total = 0; + /// the number of characters read in the current line + std::size_t chars_read_current_line = 0; + /// the number of lines read + std::size_t lines_read = 0; + + /// conversion to size_t to preserve SAX interface + constexpr operator size_t() const + { + return chars_read_total; + } +}; + +} +} + + namespace nlohmann { namespace detail @@ -704,6 +861,7 @@ json.exception.parse_error.109 | parse error: array index 'one' is not a number json.exception.parse_error.110 | parse error at 1: cannot read 2 bytes from vector | When parsing CBOR or MessagePack, the byte vector ends before the complete value has been read. json.exception.parse_error.112 | parse error at 1: error reading CBOR; last byte: 0xF8 | Not all types of CBOR or MessagePack are supported. This exception occurs if an unsupported byte was read. json.exception.parse_error.113 | parse error at 2: expected a CBOR string; last byte: 0x98 | While parsing a map key, a value that is not a string has been read. +json.exception.parse_error.114 | parse error: Unsupported BSON record type 0x0F | The parsing of the corresponding BSON record type is not implemented (yet). @note For an input with n bytes, 1 is the index of the first character and n+1 is the index of the terminating null byte or the end of file. This also @@ -727,15 +885,23 @@ class parse_error : public exception /*! @brief create a parse error exception @param[in] id_ the id of the exception - @param[in] byte_ the byte index where the error occurred (or 0 if the - position cannot be determined) + @param[in] position the position where the error occurred (or with + chars_read_total=0 if the position cannot be + determined) @param[in] what_arg the explanatory string @return parse_error object */ + static parse_error create(int id_, const position_t& pos, const std::string& what_arg) + { + std::string w = exception::name("parse_error", id_) + "parse error" + + position_string(pos) + ": " + what_arg; + return parse_error(id_, pos.chars_read_total, w.c_str()); + } + static parse_error create(int id_, std::size_t byte_, const std::string& what_arg) { std::string w = exception::name("parse_error", id_) + "parse error" + - (byte_ != 0 ? (" at " + std::to_string(byte_)) : "") + + (byte_ != 0 ? (" at byte " + std::to_string(byte_)) : "") + ": " + what_arg; return parse_error(id_, byte_, w.c_str()); } @@ -754,6 +920,12 @@ class parse_error : public exception private: parse_error(int id_, std::size_t byte_, const char* what_arg) : exception(id_, what_arg), byte(byte_) {} + + static std::string position_string(const position_t& pos) + { + return " at line " + std::to_string(pos.lines_read + 1) + + ", column " + std::to_string(pos.chars_read_current_line); + } }; /*! @@ -833,6 +1005,7 @@ json.exception.type_error.313 | invalid value to unflatten | The @ref unflatten json.exception.type_error.314 | only objects can be unflattened | The @ref unflatten function only works for an object whose keys are JSON Pointers. json.exception.type_error.315 | values in object must be primitive | The @ref unflatten function only works for an object whose keys are JSON Pointers and whose values are primitive. json.exception.type_error.316 | invalid UTF-8 byte at index 10: 0x7E | The @ref dump function only works with UTF-8 encoded strings; that is, if you assign a `std::string` to a JSON value, make sure it is UTF-8 encoded. | +json.exception.type_error.317 | JSON value cannot be serialized to requested format | The dynamic type of the object cannot be represented in the requested serialization format (e.g. a raw `true` or `null` JSON object cannot be serialized to BSON) | @liveexample{The following code shows how a `type_error` exception can be caught.,type_error} @@ -875,8 +1048,9 @@ json.exception.out_of_range.403 | key 'foo' not found | The provided key was not json.exception.out_of_range.404 | unresolved reference token 'foo' | A reference token in a JSON Pointer could not be resolved. json.exception.out_of_range.405 | JSON pointer has no parent | The JSON Patch operations 'remove' and 'add' can not be applied to the root element of the JSON value. json.exception.out_of_range.406 | number overflow parsing '10E1000' | A parsed number could not be stored as without changing it to NaN or INF. -json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON only supports integers numbers up to 9223372036854775807. | +json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON and BSON only support integer numbers up to 9223372036854775807. | json.exception.out_of_range.408 | excessive array size: 8658170730974374167 | The size (following `#`) of an UBJSON array or object exceeds the maximal capacity. | +json.exception.out_of_range.409 | BSON key cannot contain code point U+0000 (at byte 2) | Key identifiers to be serialized to BSON cannot contain code point U+0000, since the key is stored as zero-terminated c-string | @liveexample{The following code shows how an `out_of_range` exception can be caught.,out_of_range} @@ -939,8 +1113,8 @@ class other_error : public exception private: other_error(int id_, const char* what_arg) : exception(id_, what_arg) {} }; -} -} +} // namespace detail +} // namespace nlohmann // #include <nlohmann/detail/value_t.hpp> @@ -1017,8 +1191,8 @@ inline bool operator<(const value_t lhs, const value_t rhs) noexcept const auto r_index = static_cast<std::size_t>(rhs); return l_index < order.size() and r_index < order.size() and order[l_index] < order[r_index]; } -} -} +} // namespace detail +} // namespace nlohmann // #include <nlohmann/detail/conversions/from_json.hpp> @@ -1112,13 +1286,13 @@ void from_json(const BasicJsonType& j, typename BasicJsonType::string_t& s) } template < - typename BasicJsonType, typename CompatibleStringType, + typename BasicJsonType, typename ConstructibleStringType, enable_if_t < - is_compatible_string_type<BasicJsonType, CompatibleStringType>::value and + is_constructible_string_type<BasicJsonType, ConstructibleStringType>::value and not std::is_same<typename BasicJsonType::string_t, - CompatibleStringType>::value, + ConstructibleStringType>::value, int > = 0 > -void from_json(const BasicJsonType& j, CompatibleStringType& s) +void from_json(const BasicJsonType& j, ConstructibleStringType& s) { if (JSON_UNLIKELY(not j.is_string())) { @@ -1201,11 +1375,11 @@ auto from_json_array_impl(const BasicJsonType& j, std::array<T, N>& arr, } } -template<typename BasicJsonType, typename CompatibleArrayType> -auto from_json_array_impl(const BasicJsonType& j, CompatibleArrayType& arr, priority_tag<1> /*unused*/) +template<typename BasicJsonType, typename ConstructibleArrayType> +auto from_json_array_impl(const BasicJsonType& j, ConstructibleArrayType& arr, priority_tag<1> /*unused*/) -> decltype( - arr.reserve(std::declval<typename CompatibleArrayType::size_type>()), - j.template get<typename CompatibleArrayType::value_type>(), + arr.reserve(std::declval<typename ConstructibleArrayType::size_type>()), + j.template get<typename ConstructibleArrayType::value_type>(), void()) { using std::end; @@ -1216,12 +1390,12 @@ auto from_json_array_impl(const BasicJsonType& j, CompatibleArrayType& arr, prio { // get<BasicJsonType>() returns *this, this won't call a from_json // method when value_type is BasicJsonType - return i.template get<typename CompatibleArrayType::value_type>(); + return i.template get<typename ConstructibleArrayType::value_type>(); }); } -template <typename BasicJsonType, typename CompatibleArrayType> -void from_json_array_impl(const BasicJsonType& j, CompatibleArrayType& arr, +template <typename BasicJsonType, typename ConstructibleArrayType> +void from_json_array_impl(const BasicJsonType& j, ConstructibleArrayType& arr, priority_tag<0> /*unused*/) { using std::end; @@ -1232,21 +1406,21 @@ void from_json_array_impl(const BasicJsonType& j, CompatibleArrayType& arr, { // get<BasicJsonType>() returns *this, this won't call a from_json // method when value_type is BasicJsonType - return i.template get<typename CompatibleArrayType::value_type>(); + return i.template get<typename ConstructibleArrayType::value_type>(); }); } -template <typename BasicJsonType, typename CompatibleArrayType, +template <typename BasicJsonType, typename ConstructibleArrayType, enable_if_t < - is_compatible_array_type<BasicJsonType, CompatibleArrayType>::value and - not is_compatible_object_type<BasicJsonType, CompatibleArrayType>::value and - not is_compatible_string_type<BasicJsonType, CompatibleArrayType>::value and - not is_basic_json<CompatibleArrayType>::value, + is_constructible_array_type<BasicJsonType, ConstructibleArrayType>::value and + not is_constructible_object_type<BasicJsonType, ConstructibleArrayType>::value and + not is_constructible_string_type<BasicJsonType, ConstructibleArrayType>::value and + not is_basic_json<ConstructibleArrayType>::value, int > = 0 > -auto from_json(const BasicJsonType& j, CompatibleArrayType& arr) +auto from_json(const BasicJsonType& j, ConstructibleArrayType& arr) -> decltype(from_json_array_impl(j, arr, priority_tag<3> {}), -j.template get<typename CompatibleArrayType::value_type>(), +j.template get<typename ConstructibleArrayType::value_type>(), void()) { if (JSON_UNLIKELY(not j.is_array())) @@ -1258,9 +1432,9 @@ void()) from_json_array_impl(j, arr, priority_tag<3> {}); } -template<typename BasicJsonType, typename CompatibleObjectType, - enable_if_t<is_compatible_object_type<BasicJsonType, CompatibleObjectType>::value, int> = 0> -void from_json(const BasicJsonType& j, CompatibleObjectType& obj) +template<typename BasicJsonType, typename ConstructibleObjectType, + enable_if_t<is_constructible_object_type<BasicJsonType, ConstructibleObjectType>::value, int> = 0> +void from_json(const BasicJsonType& j, ConstructibleObjectType& obj) { if (JSON_UNLIKELY(not j.is_object())) { @@ -1268,13 +1442,13 @@ void from_json(const BasicJsonType& j, CompatibleObjectType& obj) } auto inner_object = j.template get_ptr<const typename BasicJsonType::object_t*>(); - using value_type = typename CompatibleObjectType::value_type; + using value_type = typename ConstructibleObjectType::value_type; std::transform( inner_object->begin(), inner_object->end(), std::inserter(obj, obj.begin()), [](typename BasicJsonType::object_t::value_type const & p) { - return value_type(p.first, p.second.template get<typename CompatibleObjectType::mapped_type>()); + return value_type(p.first, p.second.template get<typename ConstructibleObjectType::mapped_type>()); }); } @@ -1327,7 +1501,7 @@ void from_json(const BasicJsonType& j, std::pair<A1, A2>& p) } template<typename BasicJsonType, typename Tuple, std::size_t... Idx> -void from_json_tuple_impl(const BasicJsonType& j, Tuple& t, index_sequence<Idx...>) +void from_json_tuple_impl(const BasicJsonType& j, Tuple& t, index_sequence<Idx...> /*unused*/) { t = std::make_tuple(j.at(Idx).template get<typename std::tuple_element<Idx, Tuple>::type>()...); } @@ -1386,7 +1560,7 @@ struct from_json_fn return from_json(j, val); } }; -} +} // namespace detail /// namespace to hold default `from_json` function /// to see why this is required: @@ -1394,8 +1568,8 @@ struct from_json_fn namespace { constexpr const auto& from_json = detail::static_const<detail::from_json_fn>::value; -} -} +} // namespace +} // namespace nlohmann // #include <nlohmann/detail/conversions/to_json.hpp> @@ -1457,9 +1631,6 @@ template<typename IteratorType> class iteration_proxy public: explicit iteration_proxy_internal(IteratorType it) noexcept : anchor(it) {} - iteration_proxy_internal(const iteration_proxy_internal&) = default; - iteration_proxy_internal& operator=(const iteration_proxy_internal&) = default; - /// dereference operator (needed for range-based for) iteration_proxy_internal& operator*() { @@ -1542,8 +1713,8 @@ template<typename IteratorType> class iteration_proxy return iteration_proxy_internal(container.end()); } }; -} -} +} // namespace detail +} // namespace nlohmann namespace nlohmann @@ -1839,13 +2010,13 @@ void to_json(BasicJsonType& j, const std::pair<Args...>& p) // for https://github.com/nlohmann/json/pull/1134 template<typename BasicJsonType, typename T, enable_if_t<std::is_same<T, typename iteration_proxy<typename BasicJsonType::iterator>::iteration_proxy_internal>::value, int> = 0> -void to_json(BasicJsonType& j, T b) noexcept +void to_json(BasicJsonType& j, const T& b) { j = {{b.key(), b.value()}}; } template<typename BasicJsonType, typename Tuple, std::size_t... Idx> -void to_json_tuple_impl(BasicJsonType& j, const Tuple& t, index_sequence<Idx...>) +void to_json_tuple_impl(BasicJsonType& j, const Tuple& t, index_sequence<Idx...> /*unused*/) { j = {std::get<Idx>(t)...}; } @@ -1865,14 +2036,14 @@ struct to_json_fn return to_json(j, std::forward<T>(val)); } }; -} +} // namespace detail /// namespace to hold default `to_json` function namespace { constexpr const auto& to_json = detail::static_const<detail::to_json_fn>::value; -} -} +} // namespace +} // namespace nlohmann // #include <nlohmann/detail/input/input_adapters.hpp> @@ -1896,7 +2067,7 @@ namespace nlohmann namespace detail { /// the supported input formats -enum class input_format_t { json, cbor, msgpack, ubjson }; +enum class input_format_t { json, cbor, msgpack, ubjson, bson }; //////////////////// // input adapters // @@ -1949,6 +2120,8 @@ class input_stream_adapter : public input_adapter_protocol // delete because of pointer members input_stream_adapter(const input_stream_adapter&) = delete; input_stream_adapter& operator=(input_stream_adapter&) = delete; + input_stream_adapter(input_stream_adapter&&) = delete; + input_stream_adapter& operator=(input_stream_adapter&&) = delete; // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to // ensure that std::char_traits<char>::eof() and the character 0xFF do not @@ -1968,13 +2141,16 @@ class input_stream_adapter : public input_adapter_protocol class input_buffer_adapter : public input_adapter_protocol { public: - input_buffer_adapter(const char* b, const std::size_t l) + input_buffer_adapter(const char* b, const std::size_t l) noexcept : cursor(b), limit(b + l) {} // delete because of pointer members input_buffer_adapter(const input_buffer_adapter&) = delete; input_buffer_adapter& operator=(input_buffer_adapter&) = delete; + input_buffer_adapter(input_buffer_adapter&&) = delete; + input_buffer_adapter& operator=(input_buffer_adapter&&) = delete; + ~input_buffer_adapter() override = default; std::char_traits<char>::int_type get_character() noexcept override { @@ -2009,7 +2185,7 @@ struct wide_string_input_helper else { // get the current character - const int wc = static_cast<int>(str[current_wchar++]); + const auto wc = static_cast<int>(str[current_wchar++]); // UTF-32 to UTF-8 encoding if (wc < 0x80) @@ -2064,7 +2240,7 @@ struct wide_string_input_helper<WideStringType, 2> else { // get the current character - const int wc = static_cast<int>(str[current_wchar++]); + const auto wc = static_cast<int>(str[current_wchar++]); // UTF-16 to UTF-8 encoding if (wc < 0x80) @@ -2089,7 +2265,7 @@ struct wide_string_input_helper<WideStringType, 2> { if (current_wchar < str.size()) { - const int wc2 = static_cast<int>(str[current_wchar++]); + const auto wc2 = static_cast<int>(str[current_wchar++]); const int charcode = 0x10000 + (((wc & 0x3FF) << 10) | (wc2 & 0x3FF)); utf8_bytes[0] = 0xf0 | (charcode >> 18); utf8_bytes[1] = 0x80 | ((charcode >> 12) & 0x3F); @@ -2113,7 +2289,9 @@ template<typename WideStringType> class wide_string_input_adapter : public input_adapter_protocol { public: - explicit wide_string_input_adapter(const WideStringType& w) : str(w) {} + explicit wide_string_input_adapter(const WideStringType& w) noexcept + : str(w) + {} std::char_traits<char>::int_type get_character() noexcept override { @@ -2259,8 +2437,8 @@ class input_adapter /// the actual adapter input_adapter_t ia = nullptr; }; -} -} +} // namespace detail +} // namespace nlohmann // #include <nlohmann/detail/input/lexer.hpp> @@ -2277,6 +2455,8 @@ class input_adapter // #include <nlohmann/detail/input/input_adapters.hpp> +// #include <nlohmann/detail/input/position_t.hpp> + namespace nlohmann { @@ -2371,7 +2551,10 @@ class lexer // delete because of pointer members lexer(const lexer&) = delete; + lexer(lexer&&) = delete; lexer& operator=(lexer&) = delete; + lexer& operator=(lexer&&) = delete; + ~lexer() = default; private: ///////////////////// @@ -2660,44 +2843,199 @@ class lexer // invalid control characters case 0x00: + { + error_message = "invalid string: control character U+0000 (NUL) must be escaped to \\u0000"; + return token_type::parse_error; + } + case 0x01: + { + error_message = "invalid string: control character U+0001 (SOH) must be escaped to \\u0001"; + return token_type::parse_error; + } + case 0x02: + { + error_message = "invalid string: control character U+0002 (STX) must be escaped to \\u0002"; + return token_type::parse_error; + } + case 0x03: + { + error_message = "invalid string: control character U+0003 (ETX) must be escaped to \\u0003"; + return token_type::parse_error; + } + case 0x04: + { + error_message = "invalid string: control character U+0004 (EOT) must be escaped to \\u0004"; + return token_type::parse_error; + } + case 0x05: + { + error_message = "invalid string: control character U+0005 (ENQ) must be escaped to \\u0005"; + return token_type::parse_error; + } + case 0x06: + { + error_message = "invalid string: control character U+0006 (ACK) must be escaped to \\u0006"; + return token_type::parse_error; + } + case 0x07: + { + error_message = "invalid string: control character U+0007 (BEL) must be escaped to \\u0007"; + return token_type::parse_error; + } + case 0x08: + { + error_message = "invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b"; + return token_type::parse_error; + } + case 0x09: + { + error_message = "invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t"; + return token_type::parse_error; + } + case 0x0A: + { + error_message = "invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n"; + return token_type::parse_error; + } + case 0x0B: + { + error_message = "invalid string: control character U+000B (VT) must be escaped to \\u000B"; + return token_type::parse_error; + } + case 0x0C: + { + error_message = "invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f"; + return token_type::parse_error; + } + case 0x0D: + { + error_message = "invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r"; + return token_type::parse_error; + } + case 0x0E: + { + error_message = "invalid string: control character U+000E (SO) must be escaped to \\u000E"; + return token_type::parse_error; + } + case 0x0F: - case 0x10: - case 0x11: - case 0x12: - case 0x13: - case 0x14: - case 0x15: - case 0x16: - case 0x17: - case 0x18: - case 0x19: - case 0x1A: - case 0x1B: - case 0x1C: - case 0x1D: - case 0x1E: - case 0x1F: { - error_message = "invalid string: control character must be escaped"; + error_message = "invalid string: control character U+000F (SI) must be escaped to \\u000F"; return token_type::parse_error; } - // U+0020..U+007F (except U+0022 (quote) and U+005C (backspace)) - case 0x20: + case 0x10: + { + error_message = "invalid string: control character U+0010 (DLE) must be escaped to \\u0010"; + return token_type::parse_error; + } + + case 0x11: + { + error_message = "invalid string: control character U+0011 (DC1) must be escaped to \\u0011"; + return token_type::parse_error; + } + + case 0x12: + { + error_message = "invalid string: control character U+0012 (DC2) must be escaped to \\u0012"; + return token_type::parse_error; + } + + case 0x13: + { + error_message = "invalid string: control character U+0013 (DC3) must be escaped to \\u0013"; + return token_type::parse_error; + } + + case 0x14: + { + error_message = "invalid string: control character U+0014 (DC4) must be escaped to \\u0014"; + return token_type::parse_error; + } + + case 0x15: + { + error_message = "invalid string: control character U+0015 (NAK) must be escaped to \\u0015"; + return token_type::parse_error; + } + + case 0x16: + { + error_message = "invalid string: control character U+0016 (SYN) must be escaped to \\u0016"; + return token_type::parse_error; + } + + case 0x17: + { + error_message = "invalid string: control character U+0017 (ETB) must be escaped to \\u0017"; + return token_type::parse_error; + } + + case 0x18: + { + error_message = "invalid string: control character U+0018 (CAN) must be escaped to \\u0018"; + return token_type::parse_error; + } + + case 0x19: + { + error_message = "invalid string: control character U+0019 (EM) must be escaped to \\u0019"; + return token_type::parse_error; + } + + case 0x1A: + { + error_message = "invalid string: control character U+001A (SUB) must be escaped to \\u001A"; + return token_type::parse_error; + } + + case 0x1B: + { + error_message = "invalid string: control character U+001B (ESC) must be escaped to \\u001B"; + return token_type::parse_error; + } + + case 0x1C: + { + error_message = "invalid string: control character U+001C (FS) must be escaped to \\u001C"; + return token_type::parse_error; + } + + case 0x1D: + { + error_message = "invalid string: control character U+001D (GS) must be escaped to \\u001D"; + return token_type::parse_error; + } + + case 0x1E: + { + error_message = "invalid string: control character U+001E (RS) must be escaped to \\u001E"; + return token_type::parse_error; + } + + case 0x1F: + { + error_message = "invalid string: control character U+001F (US) must be escaped to \\u001F"; + return token_type::parse_error; + } + + // U+0020..U+007F (except U+0022 (quote) and U+005C (backspace)) + case 0x20: case 0x21: case 0x23: case 0x24: @@ -2976,7 +3314,7 @@ class lexer locale's decimal point is used instead of `.` to work with the locale-dependent converters. */ - token_type scan_number() + token_type scan_number() // lgtm [cpp/use-of-goto] { // reset token_buffer to store the number's bytes reset(); @@ -3349,7 +3687,9 @@ scan_number_done: */ std::char_traits<char>::int_type get() { - ++chars_read; + ++position.chars_read_total; + ++position.chars_read_current_line; + if (next_unget) { // just reset the next_unget variable and work with current @@ -3364,6 +3704,13 @@ scan_number_done: { token_string.push_back(std::char_traits<char>::to_char_type(current)); } + + if (current == '\n') + { + ++position.lines_read; + ++position.chars_read_current_line = 0; + } + return current; } @@ -3371,14 +3718,29 @@ scan_number_done: @brief unget current character (read it again on next get) We implement unget by setting variable next_unget to true. The input is not - changed - we just simulate ungetting by modifying chars_read and - token_string. The next call to get() will behave as if the unget character - is read again. + changed - we just simulate ungetting by modifying chars_read_total, + chars_read_current_line, and token_string. The next call to get() will + behave as if the unget character is read again. */ void unget() { next_unget = true; - --chars_read; + + --position.chars_read_total; + + // in case we "unget" a newline, we have to also decrement the lines_read + if (position.chars_read_current_line == 0) + { + if (position.lines_read > 0) + { + --position.lines_read; + } + } + else + { + --position.chars_read_current_line; + } + if (JSON_LIKELY(current != std::char_traits<char>::eof())) { assert(token_string.size() != 0); @@ -3426,9 +3788,9 @@ scan_number_done: ///////////////////// /// return position of last read token - constexpr std::size_t get_position() const noexcept + constexpr position_t get_position() const noexcept { - return chars_read; + return position; } /// return the last read token (for errors only). Will never contain EOF @@ -3475,30 +3837,20 @@ scan_number_done: { if (get() == 0xEF) { - if (get() == 0xBB and get() == 0xBF) - { - // we completely parsed the BOM - return true; - } - else - { - // after reading 0xEF, an unexpected character followed - return false; - } - } - else - { - // the first character is not the beginning of the BOM; unget it to - // process is later - unget(); - return true; + // check if we completely parse the BOM + return get() == 0xBB and get() == 0xBF; } + + // the first character is not the beginning of the BOM; unget it to + // process is later + unget(); + return true; } token_type scan() { // initially, skip the BOM - if (chars_read == 0 and not skip_bom()) + if (position.chars_read_total == 0 and not skip_bom()) { error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given"; return token_type::parse_error; @@ -3576,8 +3928,8 @@ scan_number_done: /// whether the next get() call should just return current bool next_unget = false; - /// the number of characters read - std::size_t chars_read = 0; + /// the start position of the current token + position_t position; /// raw input token string (for error messages) std::vector<char> token_string {}; @@ -3596,8 +3948,8 @@ scan_number_done: /// the decimal point const char decimal_point_char = '.'; }; -} -} +} // namespace detail +} // namespace nlohmann // #include <nlohmann/detail/input/parser.hpp> @@ -3755,8 +4107,8 @@ struct is_sax_static_asserts "Missing/invalid function: bool parse_error(std::size_t, const " "std::string&, const exception&)"); }; -} -} +} // namespace detail +} // namespace nlohmann // #include <nlohmann/detail/input/input_adapters.hpp> @@ -3878,7 +4230,7 @@ struct json_sax @brief a parse error occurred @param[in] position the position in the input where the error occurs @param[in] last_token the last read token - @param[in] error_msg a detailed error message + @param[in] ex an exception object describing the error @return whether parsing should proceed (must return false) */ virtual bool parse_error(std::size_t position, @@ -3946,7 +4298,7 @@ class json_sax_dom_parser return true; } - bool number_float(number_float_t val, const string_t&) + bool number_float(number_float_t val, const string_t& /*unused*/) { handle_value(val); return true; @@ -4003,7 +4355,7 @@ class json_sax_dom_parser return true; } - bool parse_error(std::size_t, const std::string&, + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const detail::exception& ex) { errored = true; @@ -4051,20 +4403,19 @@ class json_sax_dom_parser root = BasicJsonType(std::forward<Value>(v)); return &root; } + + assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); + + if (ref_stack.back()->is_array()) + { + ref_stack.back()->m_value.array->emplace_back(std::forward<Value>(v)); + return &(ref_stack.back()->m_value.array->back()); + } else { - assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); - if (ref_stack.back()->is_array()) - { - ref_stack.back()->m_value.array->emplace_back(std::forward<Value>(v)); - return &(ref_stack.back()->m_value.array->back()); - } - else - { - assert(object_element); - *object_element = BasicJsonType(std::forward<Value>(v)); - return object_element; - } + assert(object_element); + *object_element = BasicJsonType(std::forward<Value>(v)); + return object_element; } } @@ -4123,7 +4474,7 @@ class json_sax_dom_callback_parser return true; } - bool number_float(number_float_t val, const string_t&) + bool number_float(number_float_t val, const string_t& /*unused*/) { handle_value(val); return true; @@ -4261,7 +4612,7 @@ class json_sax_dom_callback_parser return true; } - bool parse_error(std::size_t, const std::string&, + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const detail::exception& ex) { errored = true; @@ -4339,37 +4690,37 @@ class json_sax_dom_callback_parser root = std::move(value); return {true, &root}; } + + // skip this value if we already decided to skip the parent + // (https://github.com/nlohmann/json/issues/971#issuecomment-413678360) + if (not ref_stack.back()) + { + return {false, nullptr}; + } + + // we now only expect arrays and objects + assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); + + if (ref_stack.back()->is_array()) + { + ref_stack.back()->m_value.array->push_back(std::move(value)); + return {true, &(ref_stack.back()->m_value.array->back())}; + } else { - // skip this value if we already decided to skip the parent - // (https://github.com/nlohmann/json/issues/971#issuecomment-413678360) - if (not ref_stack.back()) - { - return {false, nullptr}; - } + // check if we should store an element for the current key + assert(not key_keep_stack.empty()); + const bool store_element = key_keep_stack.back(); + key_keep_stack.pop_back(); - assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); - if (ref_stack.back()->is_array()) + if (not store_element) { - ref_stack.back()->m_value.array->push_back(std::move(value)); - return {true, &(ref_stack.back()->m_value.array->back())}; + return {false, nullptr}; } - else - { - // check if we should store an element for the current key - assert(not key_keep_stack.empty()); - const bool store_element = key_keep_stack.back(); - key_keep_stack.pop_back(); - - if (not store_element) - { - return {false, nullptr}; - } - assert(object_element); - *object_element = std::move(value); - return {true, object_element}; - } + assert(object_element); + *object_element = std::move(value); + return {true, object_element}; } } @@ -4407,37 +4758,37 @@ class json_sax_acceptor return true; } - bool boolean(bool) + bool boolean(bool /*unused*/) { return true; } - bool number_integer(number_integer_t) + bool number_integer(number_integer_t /*unused*/) { return true; } - bool number_unsigned(number_unsigned_t) + bool number_unsigned(number_unsigned_t /*unused*/) { return true; } - bool number_float(number_float_t, const string_t&) + bool number_float(number_float_t /*unused*/, const string_t& /*unused*/) { return true; } - bool string(string_t&) + bool string(string_t& /*unused*/) { return true; } - bool start_object(std::size_t = std::size_t(-1)) + bool start_object(std::size_t /*unused*/ = std::size_t(-1)) { return true; } - bool key(string_t&) + bool key(string_t& /*unused*/) { return true; } @@ -4447,7 +4798,7 @@ class json_sax_acceptor return true; } - bool start_array(std::size_t = std::size_t(-1)) + bool start_array(std::size_t /*unused*/ = std::size_t(-1)) { return true; } @@ -4457,14 +4808,14 @@ class json_sax_acceptor return true; } - bool parse_error(std::size_t, const std::string&, const detail::exception&) + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const detail::exception& /*unused*/) { return false; } }; -} +} // namespace detail -} +} // namespace nlohmann // #include <nlohmann/detail/input/lexer.hpp> @@ -4547,7 +4898,8 @@ class parser { sdp.parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input))); + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::end_of_input, "value"))); } // in case of an error, return discarded value @@ -4575,7 +4927,8 @@ class parser { sdp.parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input))); + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::end_of_input, "value"))); } // in case of an error, return discarded value @@ -4610,7 +4963,8 @@ class parser { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input))); + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::end_of_input, "value"))); } return result; @@ -4620,7 +4974,7 @@ class parser template <typename SAX> bool sax_parse_internal(SAX* sax) { - // stack to remember the hieararchy of structured values we are parsing + // stack to remember the hierarchy of structured values we are parsing // true = array; false = object std::vector<bool> states; // value to avoid a goto (see comment where set to true) @@ -4655,14 +5009,12 @@ class parser { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string))); + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::value_string, "object key"))); } - else + if (JSON_UNLIKELY(not sax->key(m_lexer.get_string()))) { - if (JSON_UNLIKELY(not sax->key(m_lexer.get_string()))) - { - return false; - } + return false; } // parse separator (:) @@ -4670,7 +5022,8 @@ class parser { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator))); + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::name_separator, "object separator"))); } // remember we are now inside an object @@ -4784,14 +5137,16 @@ class parser // using "uninitialized" to avoid "expected" message return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized))); + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::uninitialized, "value"))); } default: // the last token was unexpected { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value))); + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::literal_or_value, "value"))); } } } @@ -4803,7 +5158,7 @@ class parser // we reached this line after we successfully parsed a value if (states.empty()) { - // empty stack: we reached the end of the hieararchy: done + // empty stack: we reached the end of the hierarchy: done return true; } else @@ -4839,7 +5194,8 @@ class parser { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array))); + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::end_array, "array"))); } } else // object @@ -4852,7 +5208,8 @@ class parser { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string))); + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::value_string, "object key"))); } else { @@ -4867,7 +5224,8 @@ class parser { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator))); + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::name_separator, "object separator"))); } // parse values @@ -4896,7 +5254,8 @@ class parser { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object))); + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::end_object, "object"))); } } } @@ -4909,9 +5268,17 @@ class parser return (last_token = m_lexer.scan()); } - std::string exception_message(const token_type expected) + std::string exception_message(const token_type expected, const std::string& context) { - std::string error_msg = "syntax error - "; + std::string error_msg = "syntax error "; + + if (not context.empty()) + { + error_msg += "while parsing " + context + " "; + } + + error_msg += "- "; + if (last_token == token_type::parse_error) { error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" + @@ -4940,8 +5307,8 @@ class parser /// whether to throw exceptions in case of errors const bool allow_exceptions = true; }; -} -} +} // namespace detail +} // namespace nlohmann // #include <nlohmann/detail/iterators/primitive_iterator.hpp> @@ -5062,8 +5429,8 @@ class primitive_iterator_t return *this; } }; -} -} +} // namespace detail +} // namespace nlohmann // #include <nlohmann/detail/iterators/internal_iterator.hpp> @@ -5090,8 +5457,8 @@ template<typename BasicJsonType> struct internal_iterator /// generic iterator for all other types primitive_iterator_t primitive_iterator {}; }; -} -} +} // namespace detail +} // namespace nlohmann // #include <nlohmann/detail/iterators/iter_impl.hpp> @@ -5712,8 +6079,8 @@ class iter_impl /// the actual iterator of the associated instance internal_iterator<typename std::remove_const<BasicJsonType>::type> m_it; }; -} -} +} // namespace detail +} // namespace nlohmann // #include <nlohmann/detail/iterators/iteration_proxy.hpp> @@ -5835,8 +6202,8 @@ class json_reverse_iterator : public std::reverse_iterator<Base> return it.operator * (); } }; -} -} +} // namespace detail +} // namespace nlohmann // #include <nlohmann/detail/output/output_adapters.hpp> @@ -5871,7 +6238,9 @@ template<typename CharType> class output_vector_adapter : public output_adapter_protocol<CharType> { public: - explicit output_vector_adapter(std::vector<CharType>& vec) : v(vec) {} + explicit output_vector_adapter(std::vector<CharType>& vec) noexcept + : v(vec) + {} void write_character(CharType c) override { @@ -5892,7 +6261,9 @@ template<typename CharType> class output_stream_adapter : public output_adapter_protocol<CharType> { public: - explicit output_stream_adapter(std::basic_ostream<CharType>& s) : stream(s) {} + explicit output_stream_adapter(std::basic_ostream<CharType>& s) noexcept + : stream(s) + {} void write_character(CharType c) override { @@ -5913,7 +6284,9 @@ template<typename CharType, typename StringType = std::basic_string<CharType>> class output_string_adapter : public output_adapter_protocol<CharType> { public: - explicit output_string_adapter(StringType& s) : str(s) {} + explicit output_string_adapter(StringType& s) noexcept + : str(s) + {} void write_character(CharType c) override { @@ -5950,8 +6323,8 @@ class output_adapter private: output_adapter_t<CharType> oa = nullptr; }; -} -} +} // namespace detail +} // namespace nlohmann // #include <nlohmann/detail/input/binary_reader.hpp> @@ -6030,6 +6403,10 @@ class binary_reader switch (format) { + case input_format_t::bson: + result = parse_bson_internal(); + break; + case input_format_t::cbor: result = parse_cbor_internal(); break; @@ -6062,7 +6439,8 @@ class binary_reader if (JSON_UNLIKELY(current != std::char_traits<char>::eof())) { - return sax->parse_error(chars_read, get_token_string(), parse_error::create(110, chars_read, "expected end of input")); + return sax->parse_error(chars_read, get_token_string(), + parse_error::create(110, chars_read, exception_message(format, "expected end of input; last byte: 0x" + get_token_string(), "value"))); } } @@ -6082,70 +6460,286 @@ class binary_reader } private: - /*! - @param[in] get_char whether a new character should be retrieved from the - input (true, default) or whether the last read - character should be considered instead + ////////// + // BSON // + ////////// - @return whether a valid CBOR value was passed to the SAX parser + /*! + @brief Reads in a BSON-object and passes it to the SAX-parser. + @return whether a valid BSON-value was passed to the SAX parser */ - bool parse_cbor_internal(const bool get_char = true) + bool parse_bson_internal() { - switch (get_char ? get() : current) + std::int32_t document_size; + get_number<std::int32_t, true>(input_format_t::bson, document_size); + + if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1)))) { - // EOF - case std::char_traits<char>::eof(): - return unexpect_eof(); + return false; + } - // Integer 0x00..0x17 (0..23) - case 0x00: - case 0x01: - case 0x02: - case 0x03: - case 0x04: - case 0x05: - case 0x06: - case 0x07: - case 0x08: - case 0x09: - case 0x0A: - case 0x0B: - case 0x0C: - case 0x0D: - case 0x0E: - case 0x0F: - case 0x10: - case 0x11: - case 0x12: - case 0x13: - case 0x14: - case 0x15: - case 0x16: - case 0x17: - return sax->number_unsigned(static_cast<number_unsigned_t>(current)); + if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false))) + { + return false; + } - case 0x18: // Unsigned integer (one-byte uint8_t follows) + return sax->end_object(); + } + + /*! + @brief Parses a C-style string from the BSON input. + @param[in, out] result A reference to the string variable where the read + string is to be stored. + @return `true` if the \x00-byte indicating the end of the string was + encountered before the EOF; false` indicates an unexpected EOF. + */ + bool get_bson_cstr(string_t& result) + { + auto out = std::back_inserter(result); + while (true) + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "cstring"))) { - uint8_t number; - return get_number(number) and sax->number_unsigned(number); + return false; } - - case 0x19: // Unsigned integer (two-byte uint16_t follows) + if (current == 0x00) { - uint16_t number; - return get_number(number) and sax->number_unsigned(number); + return true; } + *out++ = static_cast<char>(current); + } - case 0x1A: // Unsigned integer (four-byte uint32_t follows) + return true; + } + + /*! + @brief Parses a zero-terminated string of length @a len from the BSON + input. + @param[in] len The length (including the zero-byte at the end) of the + string to be read. + @param[in, out] result A reference to the string variable where the read + string is to be stored. + @tparam NumberType The type of the length @a len + @pre len >= 1 + @return `true` if the string was successfully parsed + */ + template<typename NumberType> + bool get_bson_string(const NumberType len, string_t& result) + { + if (JSON_UNLIKELY(len < 1)) + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "string length must be at least 1, is " + std::to_string(len), "string"))); + } + + return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) and get() != std::char_traits<char>::eof(); + } + + /*! + @brief Read a BSON document element of the given @a element_type. + @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html + @param[in] element_type_parse_position The position in the input stream, + where the `element_type` was read. + @warning Not all BSON element types are supported yet. An unsupported + @a element_type will give rise to a parse_error.114: + Unsupported BSON record type 0x... + @return whether a valid BSON-object/array was passed to the SAX parser + */ + bool parse_bson_element_internal(const int element_type, + const std::size_t element_type_parse_position) + { + switch (element_type) + { + case 0x01: // double { - uint32_t number; - return get_number(number) and sax->number_unsigned(number); + double number; + return get_number<double, true>(input_format_t::bson, number) and sax->number_float(static_cast<number_float_t>(number), ""); } - case 0x1B: // Unsigned integer (eight-byte uint64_t follows) + case 0x02: // string { - uint64_t number; - return get_number(number) and sax->number_unsigned(number); + std::int32_t len; + string_t value; + return get_number<std::int32_t, true>(input_format_t::bson, len) and get_bson_string(len, value) and sax->string(value); + } + + case 0x03: // object + { + return parse_bson_internal(); + } + + case 0x04: // array + { + return parse_bson_array(); + } + + case 0x08: // boolean + { + return sax->boolean(static_cast<bool>(get())); + } + + case 0x0A: // null + { + return sax->null(); + } + + case 0x10: // int32 + { + std::int32_t value; + return get_number<std::int32_t, true>(input_format_t::bson, value) and sax->number_integer(value); + } + + case 0x12: // int64 + { + std::int64_t value; + return get_number<std::int64_t, true>(input_format_t::bson, value) and sax->number_integer(value); + } + + default: // anything else not supported (yet) + { + char cr[3]; + snprintf(cr, sizeof(cr), "%.2hhX", static_cast<unsigned char>(element_type)); + return sax->parse_error(element_type_parse_position, std::string(cr), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr))); + } + } + } + + /*! + @brief Read a BSON element list (as specified in the BSON-spec) + + The same binary layout is used for objects and arrays, hence it must be + indicated with the argument @a is_array which one is expected + (true --> array, false --> object). + + @param[in] is_array Determines if the element list being read is to be + treated as an object (@a is_array == false), or as an + array (@a is_array == true). + @return whether a valid BSON-object/array was passed to the SAX parser + */ + bool parse_bson_element_list(const bool is_array) + { + string_t key; + while (int element_type = get()) + { + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list"))) + { + return false; + } + + const std::size_t element_type_parse_position = chars_read; + if (JSON_UNLIKELY(not get_bson_cstr(key))) + { + return false; + } + + if (not is_array) + { + sax->key(key); + } + + if (JSON_UNLIKELY(not parse_bson_element_internal(element_type, element_type_parse_position))) + { + return false; + } + + // get_bson_cstr only appends + key.clear(); + } + + return true; + } + + /*! + @brief Reads an array from the BSON input and passes it to the SAX-parser. + @return whether a valid BSON-array was passed to the SAX parser + */ + bool parse_bson_array() + { + std::int32_t document_size; + get_number<std::int32_t, true>(input_format_t::bson, document_size); + + if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1)))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/true))) + { + return false; + } + + return sax->end_array(); + } + + ////////// + // CBOR // + ////////// + + /*! + @param[in] get_char whether a new character should be retrieved from the + input (true, default) or whether the last read + character should be considered instead + + @return whether a valid CBOR value was passed to the SAX parser + */ + bool parse_cbor_internal(const bool get_char = true) + { + switch (get_char ? get() : current) + { + // EOF + case std::char_traits<char>::eof(): + return unexpect_eof(input_format_t::cbor, "value"); + + // Integer 0x00..0x17 (0..23) + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x08: + case 0x09: + case 0x0A: + case 0x0B: + case 0x0C: + case 0x0D: + case 0x0E: + case 0x0F: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + return sax->number_unsigned(static_cast<number_unsigned_t>(current)); + + case 0x18: // Unsigned integer (one-byte uint8_t follows) + { + uint8_t number; + return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); + } + + case 0x19: // Unsigned integer (two-byte uint16_t follows) + { + uint16_t number; + return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); + } + + case 0x1A: // Unsigned integer (four-byte uint32_t follows) + { + uint32_t number; + return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); + } + + case 0x1B: // Unsigned integer (eight-byte uint64_t follows) + { + uint64_t number; + return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); } // Negative integer -1-0x00..-1-0x17 (-1..-24) @@ -6178,25 +6772,25 @@ class binary_reader case 0x38: // Negative integer (one-byte uint8_t follows) { uint8_t number; - return get_number(number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); + return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); } case 0x39: // Negative integer -1-n (two-byte uint16_t follows) { uint16_t number; - return get_number(number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); + return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); } case 0x3A: // Negative integer -1-n (four-byte uint32_t follows) { uint32_t number; - return get_number(number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); + return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); } case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows) { uint64_t number; - return get_number(number) and sax->number_integer(static_cast<number_integer_t>(-1) + return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - static_cast<number_integer_t>(number)); } @@ -6265,25 +6859,25 @@ class binary_reader case 0x98: // array (one-byte uint8_t for n follows) { uint8_t len; - return get_number(len) and get_cbor_array(static_cast<std::size_t>(len)); + return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); } case 0x99: // array (two-byte uint16_t for n follow) { uint16_t len; - return get_number(len) and get_cbor_array(static_cast<std::size_t>(len)); + return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); } case 0x9A: // array (four-byte uint32_t for n follow) { uint32_t len; - return get_number(len) and get_cbor_array(static_cast<std::size_t>(len)); + return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); } case 0x9B: // array (eight-byte uint64_t for n follow) { uint64_t len; - return get_number(len) and get_cbor_array(static_cast<std::size_t>(len)); + return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); } case 0x9F: // array (indefinite length) @@ -6319,25 +6913,25 @@ class binary_reader case 0xB8: // map (one-byte uint8_t for n follows) { uint8_t len; - return get_number(len) and get_cbor_object(static_cast<std::size_t>(len)); + return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); } case 0xB9: // map (two-byte uint16_t for n follow) { uint16_t len; - return get_number(len) and get_cbor_object(static_cast<std::size_t>(len)); + return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); } case 0xBA: // map (four-byte uint32_t for n follow) { uint32_t len; - return get_number(len) and get_cbor_object(static_cast<std::size_t>(len)); + return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); } case 0xBB: // map (eight-byte uint64_t for n follow) { uint64_t len; - return get_number(len) and get_cbor_object(static_cast<std::size_t>(len)); + return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); } case 0xBF: // map (indefinite length) @@ -6354,17 +6948,20 @@ class binary_reader case 0xF9: // Half-Precision Float (two-byte IEEE 754) { - const int byte1 = get(); - if (JSON_UNLIKELY(not unexpect_eof())) + const int byte1_raw = get(); + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "number"))) { return false; } - const int byte2 = get(); - if (JSON_UNLIKELY(not unexpect_eof())) + const int byte2_raw = get(); + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "number"))) { return false; } + const auto byte1 = static_cast<unsigned char>(byte1_raw); + const auto byte2 = static_cast<unsigned char>(byte2_raw); + // code from RFC 7049, Appendix D, Figure 3: // As half-precision floating-point numbers were only added // to IEEE 754 in 2008, today's programming platforms often @@ -6400,131 +6997,44 @@ class binary_reader case 0xFA: // Single-Precision Float (four-byte IEEE 754) { float number; - return get_number(number) and sax->number_float(static_cast<number_float_t>(number), ""); + return get_number(input_format_t::cbor, number) and sax->number_float(static_cast<number_float_t>(number), ""); } case 0xFB: // Double-Precision Float (eight-byte IEEE 754) { double number; - return get_number(number) and sax->number_float(static_cast<number_float_t>(number), ""); + return get_number(input_format_t::cbor, number) and sax->number_float(static_cast<number_float_t>(number), ""); } default: // anything else (0xFF is handled inside the other types) { auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, "error reading CBOR; last byte: 0x" + last_token)); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value"))); } } } /*! - @return whether a valid MessagePack value was passed to the SAX parser + @brief reads a CBOR string + + This function first reads starting bytes to determine the expected + string length and then copies this number of bytes into a string. + Additionally, CBOR's strings with indefinite lengths are supported. + + @param[out] result created string + + @return whether string creation completed */ - bool parse_msgpack_internal() + bool get_cbor_string(string_t& result) { - switch (get()) + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string"))) { - // EOF - case std::char_traits<char>::eof(): - return unexpect_eof(); + return false; + } - // positive fixint - case 0x00: - case 0x01: - case 0x02: - case 0x03: - case 0x04: - case 0x05: - case 0x06: - case 0x07: - case 0x08: - case 0x09: - case 0x0A: - case 0x0B: - case 0x0C: - case 0x0D: - case 0x0E: - case 0x0F: - case 0x10: - case 0x11: - case 0x12: - case 0x13: - case 0x14: - case 0x15: - case 0x16: - case 0x17: - case 0x18: - case 0x19: - case 0x1A: - case 0x1B: - case 0x1C: - case 0x1D: - case 0x1E: - case 0x1F: - case 0x20: - case 0x21: - case 0x22: - case 0x23: - case 0x24: - case 0x25: - case 0x26: - case 0x27: - case 0x28: - case 0x29: - case 0x2A: - case 0x2B: - case 0x2C: - case 0x2D: - case 0x2E: - case 0x2F: - case 0x30: - case 0x31: - case 0x32: - case 0x33: - case 0x34: - case 0x35: - case 0x36: - case 0x37: - case 0x38: - case 0x39: - case 0x3A: - case 0x3B: - case 0x3C: - case 0x3D: - case 0x3E: - case 0x3F: - case 0x40: - case 0x41: - case 0x42: - case 0x43: - case 0x44: - case 0x45: - case 0x46: - case 0x47: - case 0x48: - case 0x49: - case 0x4A: - case 0x4B: - case 0x4C: - case 0x4D: - case 0x4E: - case 0x4F: - case 0x50: - case 0x51: - case 0x52: - case 0x53: - case 0x54: - case 0x55: - case 0x56: - case 0x57: - case 0x58: - case 0x59: - case 0x5A: - case 0x5B: - case 0x5C: - case 0x5D: - case 0x5E: - case 0x5F: + switch (current) + { + // UTF-8 string (0x00..0x17 bytes follow) case 0x60: case 0x61: case 0x62: @@ -6549,368 +7059,254 @@ class binary_reader case 0x75: case 0x76: case 0x77: - case 0x78: - case 0x79: - case 0x7A: - case 0x7B: - case 0x7C: - case 0x7D: - case 0x7E: - case 0x7F: - return sax->number_unsigned(static_cast<number_unsigned_t>(current)); - - // fixmap - case 0x80: - case 0x81: - case 0x82: - case 0x83: - case 0x84: - case 0x85: - case 0x86: - case 0x87: - case 0x88: - case 0x89: - case 0x8A: - case 0x8B: - case 0x8C: - case 0x8D: - case 0x8E: - case 0x8F: - return get_msgpack_object(static_cast<std::size_t>(current & 0x0F)); - - // fixarray - case 0x90: - case 0x91: - case 0x92: - case 0x93: - case 0x94: - case 0x95: - case 0x96: - case 0x97: - case 0x98: - case 0x99: - case 0x9A: - case 0x9B: - case 0x9C: - case 0x9D: - case 0x9E: - case 0x9F: - return get_msgpack_array(static_cast<std::size_t>(current & 0x0F)); - - // fixstr - case 0xA0: - case 0xA1: - case 0xA2: - case 0xA3: - case 0xA4: - case 0xA5: - case 0xA6: - case 0xA7: - case 0xA8: - case 0xA9: - case 0xAA: - case 0xAB: - case 0xAC: - case 0xAD: - case 0xAE: - case 0xAF: - case 0xB0: - case 0xB1: - case 0xB2: - case 0xB3: - case 0xB4: - case 0xB5: - case 0xB6: - case 0xB7: - case 0xB8: - case 0xB9: - case 0xBA: - case 0xBB: - case 0xBC: - case 0xBD: - case 0xBE: - case 0xBF: - { - string_t s; - return get_msgpack_string(s) and sax->string(s); - } - - case 0xC0: // nil - return sax->null(); - - case 0xC2: // false - return sax->boolean(false); - - case 0xC3: // true - return sax->boolean(true); - - case 0xCA: // float 32 - { - float number; - return get_number(number) and sax->number_float(static_cast<number_float_t>(number), ""); - } - - case 0xCB: // float 64 - { - double number; - return get_number(number) and sax->number_float(static_cast<number_float_t>(number), ""); - } - - case 0xCC: // uint 8 - { - uint8_t number; - return get_number(number) and sax->number_unsigned(number); - } - - case 0xCD: // uint 16 - { - uint16_t number; - return get_number(number) and sax->number_unsigned(number); - } - - case 0xCE: // uint 32 - { - uint32_t number; - return get_number(number) and sax->number_unsigned(number); - } - - case 0xCF: // uint 64 - { - uint64_t number; - return get_number(number) and sax->number_unsigned(number); - } - - case 0xD0: // int 8 - { - int8_t number; - return get_number(number) and sax->number_integer(number); - } - - case 0xD1: // int 16 - { - int16_t number; - return get_number(number) and sax->number_integer(number); - } - - case 0xD2: // int 32 - { - int32_t number; - return get_number(number) and sax->number_integer(number); - } - - case 0xD3: // int 64 { - int64_t number; - return get_number(number) and sax->number_integer(number); + return get_string(input_format_t::cbor, current & 0x1F, result); } - case 0xD9: // str 8 - case 0xDA: // str 16 - case 0xDB: // str 32 + case 0x78: // UTF-8 string (one-byte uint8_t for n follows) { - string_t s; - return get_msgpack_string(s) and sax->string(s); + uint8_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); } - case 0xDC: // array 16 + case 0x79: // UTF-8 string (two-byte uint16_t for n follow) { uint16_t len; - return get_number(len) and get_msgpack_array(static_cast<std::size_t>(len)); + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); } - case 0xDD: // array 32 + case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) { uint32_t len; - return get_number(len) and get_msgpack_array(static_cast<std::size_t>(len)); + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); } - case 0xDE: // map 16 + case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) { - uint16_t len; - return get_number(len) and get_msgpack_object(static_cast<std::size_t>(len)); + uint64_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); } - case 0xDF: // map 32 + case 0x7F: // UTF-8 string (indefinite length) { - uint32_t len; - return get_number(len) and get_msgpack_object(static_cast<std::size_t>(len)); + while (get() != 0xFF) + { + string_t chunk; + if (not get_cbor_string(chunk)) + { + return false; + } + result.append(chunk); + } + return true; } - // negative fixint - case 0xE0: - case 0xE1: - case 0xE2: - case 0xE3: - case 0xE4: - case 0xE5: - case 0xE6: - case 0xE7: - case 0xE8: - case 0xE9: - case 0xEA: - case 0xEB: - case 0xEC: - case 0xED: - case 0xEE: - case 0xEF: - case 0xF0: - case 0xF1: - case 0xF2: - case 0xF3: - case 0xF4: - case 0xF5: - case 0xF6: - case 0xF7: - case 0xF8: - case 0xF9: - case 0xFA: - case 0xFB: - case 0xFC: - case 0xFD: - case 0xFE: - case 0xFF: - return sax->number_integer(static_cast<int8_t>(current)); - - default: // anything else + default: { auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, "error reading MessagePack; last byte: 0x" + last_token)); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string"))); } } } /*! - @param[in] get_char whether a new character should be retrieved from the - input (true, default) or whether the last read - character should be considered instead - - @return whether a valid UBJSON value was passed to the SAX parser + @param[in] len the length of the array or std::size_t(-1) for an + array of indefinite size + @return whether array creation completed */ - bool parse_ubjson_internal(const bool get_char = true) + bool get_cbor_array(const std::size_t len) { - return get_ubjson_value(get_char ? get_ignore_noop() : current); - } - - /*! - @brief get next character from the input + if (JSON_UNLIKELY(not sax->start_array(len))) + { + return false; + } - This function provides the interface to the used input adapter. It does - not throw in case the input reached EOF, but returns a -'ve valued - `std::char_traits<char>::eof()` in that case. + if (len != std::size_t(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + } + } + else + { + while (get() != 0xFF) + { + if (JSON_UNLIKELY(not parse_cbor_internal(false))) + { + return false; + } + } + } - @return character read from the input - */ - int get() - { - ++chars_read; - return (current = ia->get_character()); + return sax->end_array(); } /*! - @return character read from the input after ignoring all 'N' entries + @param[in] len the length of the object or std::size_t(-1) for an + object of indefinite size + @return whether object creation completed */ - int get_ignore_noop() + bool get_cbor_object(const std::size_t len) { - do + if (not JSON_UNLIKELY(sax->start_object(len))) { - get(); + return false; } - while (current == 'N'); - - return current; - } - - /* - @brief read a number from the input - - @tparam NumberType the type of the number - @param[out] result number of type @a NumberType - - @return whether conversion completed - @note This function needs to respect the system's endianess, because - bytes in CBOR, MessagePack, and UBJSON are stored in network order - (big endian) and therefore need reordering on little endian systems. - */ - template<typename NumberType> - bool get_number(NumberType& result) - { - // step 1: read input into array with system's byte order - std::array<uint8_t, sizeof(NumberType)> vec; - for (std::size_t i = 0; i < sizeof(NumberType); ++i) + string_t key; + if (len != std::size_t(-1)) { - get(); - if (JSON_UNLIKELY(not unexpect_eof())) + for (std::size_t i = 0; i < len; ++i) { - return false; - } + get(); + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) + { + return false; + } - // reverse byte order prior to conversion if necessary - if (is_little_endian) - { - vec[sizeof(NumberType) - i - 1] = static_cast<uint8_t>(current); + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + key.clear(); } - else + } + else + { + while (get() != 0xFF) { - vec[i] = static_cast<uint8_t>(current); // LCOV_EXCL_LINE + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + key.clear(); } } - // step 2: convert array into number of type T and return - std::memcpy(&result, vec.data(), sizeof(NumberType)); - return true; + return sax->end_object(); } - /*! - @brief create a string by reading characters from the input - - @tparam NumberType the type of the number - @param[in] len number of characters to read - @param[out] string created by reading @a len bytes - - @return whether string creation completed - - @note We can not reserve @a len bytes for the result, because @a len - may be too large. Usually, @ref unexpect_eof() detects the end of - the input before we run out of string memory. - */ - template<typename NumberType> - bool get_string(const NumberType len, string_t& result) - { - bool success = true; - std::generate_n(std::back_inserter(result), len, [this, &success]() - { - get(); - if (JSON_UNLIKELY(not unexpect_eof())) - { - success = false; - } - return static_cast<char>(current); - }); - return success; - } + ///////////// + // MsgPack // + ///////////// /*! - @brief reads a CBOR string - - This function first reads starting bytes to determine the expected - string length and then copies this number of bytes into a string. - Additionally, CBOR's strings with indefinite lengths are supported. - - @param[out] result created string - - @return whether string creation completed + @return whether a valid MessagePack value was passed to the SAX parser */ - bool get_cbor_string(string_t& result) + bool parse_msgpack_internal() { - if (JSON_UNLIKELY(not unexpect_eof())) + switch (get()) { - return false; - } + // EOF + case std::char_traits<char>::eof(): + return unexpect_eof(input_format_t::msgpack, "value"); - switch (current) - { - // UTF-8 string (0x00..0x17 bytes follow) + // positive fixint + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x08: + case 0x09: + case 0x0A: + case 0x0B: + case 0x0C: + case 0x0D: + case 0x0E: + case 0x0F: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1A: + case 0x1B: + case 0x1C: + case 0x1D: + case 0x1E: + case 0x1F: + case 0x20: + case 0x21: + case 0x22: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2A: + case 0x2B: + case 0x2C: + case 0x2D: + case 0x2E: + case 0x2F: + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + case 0x38: + case 0x39: + case 0x3A: + case 0x3B: + case 0x3C: + case 0x3D: + case 0x3E: + case 0x3F: + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4A: + case 0x4B: + case 0x4C: + case 0x4D: + case 0x4E: + case 0x4F: + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + case 0x58: + case 0x59: + case 0x5A: + case 0x5B: + case 0x5C: + case 0x5D: + case 0x5E: + case 0x5F: case 0x60: case 0x61: case 0x62: @@ -6935,138 +7331,234 @@ class binary_reader case 0x75: case 0x76: case 0x77: + case 0x78: + case 0x79: + case 0x7A: + case 0x7B: + case 0x7C: + case 0x7D: + case 0x7E: + case 0x7F: + return sax->number_unsigned(static_cast<number_unsigned_t>(current)); + + // fixmap + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + return get_msgpack_object(static_cast<std::size_t>(current & 0x0F)); + + // fixarray + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + return get_msgpack_array(static_cast<std::size_t>(current & 0x0F)); + + // fixstr + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: + { + string_t s; + return get_msgpack_string(s) and sax->string(s); + } + + case 0xC0: // nil + return sax->null(); + + case 0xC2: // false + return sax->boolean(false); + + case 0xC3: // true + return sax->boolean(true); + + case 0xCA: // float 32 + { + float number; + return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), ""); + } + + case 0xCB: // float 64 + { + double number; + return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), ""); + } + + case 0xCC: // uint 8 { - return get_string(current & 0x1F, result); + uint8_t number; + return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); } - case 0x78: // UTF-8 string (one-byte uint8_t for n follows) + case 0xCD: // uint 16 { - uint8_t len; - return get_number(len) and get_string(len, result); + uint16_t number; + return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); } - case 0x79: // UTF-8 string (two-byte uint16_t for n follow) + case 0xCE: // uint 32 { - uint16_t len; - return get_number(len) and get_string(len, result); + uint32_t number; + return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); } - case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) + case 0xCF: // uint 64 { - uint32_t len; - return get_number(len) and get_string(len, result); + uint64_t number; + return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); } - case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) + case 0xD0: // int 8 { - uint64_t len; - return get_number(len) and get_string(len, result); + int8_t number; + return get_number(input_format_t::msgpack, number) and sax->number_integer(number); } - case 0x7F: // UTF-8 string (indefinite length) + case 0xD1: // int 16 { - while (get() != 0xFF) - { - string_t chunk; - if (not get_cbor_string(chunk)) - { - return false; - } - result.append(chunk); - } - return true; + int16_t number; + return get_number(input_format_t::msgpack, number) and sax->number_integer(number); } - default: + case 0xD2: // int 32 { - auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, "expected a CBOR string; last byte: 0x" + last_token)); + int32_t number; + return get_number(input_format_t::msgpack, number) and sax->number_integer(number); } - } - } - - /*! - @param[in] len the length of the array or std::size_t(-1) for an - array of indefinite size - @return whether array creation completed - */ - bool get_cbor_array(const std::size_t len) - { - if (JSON_UNLIKELY(not sax->start_array(len))) - { - return false; - } - if (len != std::size_t(-1)) - for (std::size_t i = 0; i < len; ++i) + case 0xD3: // int 64 { - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } + int64_t number; + return get_number(input_format_t::msgpack, number) and sax->number_integer(number); } - else - { - while (get() != 0xFF) + + case 0xD9: // str 8 + case 0xDA: // str 16 + case 0xDB: // str 32 { - if (JSON_UNLIKELY(not parse_cbor_internal(false))) - { - return false; - } + string_t s; + return get_msgpack_string(s) and sax->string(s); } - } - - return sax->end_array(); - } - /*! - @param[in] len the length of the object or std::size_t(-1) for an - object of indefinite size - @return whether object creation completed - */ - bool get_cbor_object(const std::size_t len) - { - if (not JSON_UNLIKELY(sax->start_object(len))) - { - return false; - } + case 0xDC: // array 16 + { + uint16_t len; + return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len)); + } - string_t key; - if (len != std::size_t(-1)) - { - for (std::size_t i = 0; i < len; ++i) + case 0xDD: // array 32 { - get(); - if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) - { - return false; - } + uint32_t len; + return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len)); + } - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - key.clear(); + case 0xDE: // map 16 + { + uint16_t len; + return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len)); } - } - else - { - while (get() != 0xFF) + + case 0xDF: // map 32 { - if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) - { - return false; - } + uint32_t len; + return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len)); + } - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - key.clear(); + // negative fixint + case 0xE0: + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xED: + case 0xEE: + case 0xEF: + case 0xF0: + case 0xF1: + case 0xF2: + case 0xF3: + case 0xF4: + case 0xF5: + case 0xF6: + case 0xF7: + case 0xF8: + case 0xF9: + case 0xFA: + case 0xFB: + case 0xFC: + case 0xFD: + case 0xFE: + case 0xFF: + return sax->number_integer(static_cast<int8_t>(current)); + + default: // anything else + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::msgpack, "invalid byte: 0x" + last_token, "value"))); } } - - return sax->end_object(); } /*! @@ -7081,7 +7573,7 @@ class binary_reader */ bool get_msgpack_string(string_t& result) { - if (JSON_UNLIKELY(not unexpect_eof())) + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::msgpack, "string"))) { return false; } @@ -7122,31 +7614,31 @@ class binary_reader case 0xBE: case 0xBF: { - return get_string(current & 0x1F, result); + return get_string(input_format_t::msgpack, current & 0x1F, result); } case 0xD9: // str 8 { uint8_t len; - return get_number(len) and get_string(len, result); + return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result); } case 0xDA: // str 16 { uint16_t len; - return get_number(len) and get_string(len, result); + return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result); } case 0xDB: // str 32 { uint32_t len; - return get_number(len) and get_string(len, result); + return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result); } default: { auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, "expected a MessagePack string; last byte: 0x" + last_token)); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::msgpack, "expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x" + last_token, "string"))); } } } @@ -7203,6 +7695,22 @@ class binary_reader return sax->end_object(); } + //////////// + // UBJSON // + //////////// + + /*! + @param[in] get_char whether a new character should be retrieved from the + input (true, default) or whether the last read + character should be considered instead + + @return whether a valid UBJSON value was passed to the SAX parser + */ + bool parse_ubjson_internal(const bool get_char = true) + { + return get_ubjson_value(get_char ? get_ignore_noop() : current); + } + /*! @brief reads a UBJSON string @@ -7224,7 +7732,7 @@ class binary_reader get(); // TODO: may we ignore N here? } - if (JSON_UNLIKELY(not unexpect_eof())) + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "value"))) { return false; } @@ -7234,36 +7742,36 @@ class binary_reader case 'U': { uint8_t len; - return get_number(len) and get_string(len, result); + return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); } case 'i': { int8_t len; - return get_number(len) and get_string(len, result); + return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); } case 'I': { int16_t len; - return get_number(len) and get_string(len, result); + return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); } case 'l': { int32_t len; - return get_number(len) and get_string(len, result); + return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); } case 'L': { int64_t len; - return get_number(len) and get_string(len, result); + return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); } default: auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, "expected a UBJSON string; last byte: 0x" + last_token)); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L); last byte: 0x" + last_token, "string"))); } } @@ -7278,7 +7786,7 @@ class binary_reader case 'U': { uint8_t number; - if (JSON_UNLIKELY(not get_number(number))) + if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) { return false; } @@ -7289,7 +7797,7 @@ class binary_reader case 'i': { int8_t number; - if (JSON_UNLIKELY(not get_number(number))) + if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) { return false; } @@ -7300,7 +7808,7 @@ class binary_reader case 'I': { int16_t number; - if (JSON_UNLIKELY(not get_number(number))) + if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) { return false; } @@ -7311,7 +7819,7 @@ class binary_reader case 'l': { int32_t number; - if (JSON_UNLIKELY(not get_number(number))) + if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) { return false; } @@ -7322,7 +7830,7 @@ class binary_reader case 'L': { int64_t number; - if (JSON_UNLIKELY(not get_number(number))) + if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) { return false; } @@ -7333,7 +7841,7 @@ class binary_reader default: { auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, "byte after '#' must denote a number type; last byte: 0x" + last_token)); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token, "size"))); } } } @@ -7358,7 +7866,7 @@ class binary_reader if (current == '$') { result.second = get(); // must not ignore 'N', because 'N' maybe the type - if (JSON_UNLIKELY(not unexpect_eof())) + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "type"))) { return false; } @@ -7366,12 +7874,12 @@ class binary_reader get_ignore_noop(); if (JSON_UNLIKELY(current != '#')) { - if (JSON_UNLIKELY(not unexpect_eof())) + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "value"))) { return false; } auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, "expected '#' after UBJSON type information; last byte: 0x" + last_token)); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "expected '#' after type information; last byte: 0x" + last_token, "size"))); } return get_ubjson_size_value(result.first); @@ -7392,7 +7900,7 @@ class binary_reader switch (prefix) { case std::char_traits<char>::eof(): // EOF - return unexpect_eof(); + return unexpect_eof(input_format_t::ubjson, "value"); case 'T': // true return sax->boolean(true); @@ -7405,56 +7913,56 @@ class binary_reader case 'U': { uint8_t number; - return get_number(number) and sax->number_unsigned(number); + return get_number(input_format_t::ubjson, number) and sax->number_unsigned(number); } case 'i': { int8_t number; - return get_number(number) and sax->number_integer(number); + return get_number(input_format_t::ubjson, number) and sax->number_integer(number); } case 'I': { int16_t number; - return get_number(number) and sax->number_integer(number); + return get_number(input_format_t::ubjson, number) and sax->number_integer(number); } case 'l': { int32_t number; - return get_number(number) and sax->number_integer(number); + return get_number(input_format_t::ubjson, number) and sax->number_integer(number); } case 'L': { int64_t number; - return get_number(number) and sax->number_integer(number); + return get_number(input_format_t::ubjson, number) and sax->number_integer(number); } case 'd': { float number; - return get_number(number) and sax->number_float(static_cast<number_float_t>(number), ""); + return get_number(input_format_t::ubjson, number) and sax->number_float(static_cast<number_float_t>(number), ""); } case 'D': { double number; - return get_number(number) and sax->number_float(static_cast<number_float_t>(number), ""); + return get_number(input_format_t::ubjson, number) and sax->number_float(static_cast<number_float_t>(number), ""); } case 'C': // char { get(); - if (JSON_UNLIKELY(not unexpect_eof())) + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "char"))) { return false; } if (JSON_UNLIKELY(current > 127)) { auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token)); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token, "char"))); } string_t s(1, static_cast<char>(current)); return sax->string(s); @@ -7475,7 +7983,7 @@ class binary_reader default: // anything else { auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, "error reading UBJSON; last byte: 0x" + last_token)); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "invalid byte: 0x" + last_token, "value"))); } } } @@ -7601,30 +8109,140 @@ class binary_reader while (current != '}') { - if (JSON_UNLIKELY(not get_ubjson_string(key, false) or not sax->key(key))) - { - return false; - } - if (JSON_UNLIKELY(not parse_ubjson_internal())) - { - return false; - } - get_ignore_noop(); - key.clear(); + if (JSON_UNLIKELY(not get_ubjson_string(key, false) or not sax->key(key))) + { + return false; + } + if (JSON_UNLIKELY(not parse_ubjson_internal())) + { + return false; + } + get_ignore_noop(); + key.clear(); + } + } + + return sax->end_object(); + } + + /////////////////////// + // Utility functions // + /////////////////////// + + /*! + @brief get next character from the input + + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns a -'ve valued + `std::char_traits<char>::eof()` in that case. + + @return character read from the input + */ + int get() + { + ++chars_read; + return (current = ia->get_character()); + } + + /*! + @return character read from the input after ignoring all 'N' entries + */ + int get_ignore_noop() + { + do + { + get(); + } + while (current == 'N'); + + return current; + } + + /* + @brief read a number from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[out] result number of type @a NumberType + + @return whether conversion completed + + @note This function needs to respect the system's endianess, because + bytes in CBOR, MessagePack, and UBJSON are stored in network order + (big endian) and therefore need reordering on little endian systems. + */ + template<typename NumberType, bool InputIsLittleEndian = false> + bool get_number(const input_format_t format, NumberType& result) + { + // step 1: read input into array with system's byte order + std::array<uint8_t, sizeof(NumberType)> vec; + for (std::size_t i = 0; i < sizeof(NumberType); ++i) + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(format, "number"))) + { + return false; + } + + // reverse byte order prior to conversion if necessary + if (is_little_endian && !InputIsLittleEndian) + { + vec[sizeof(NumberType) - i - 1] = static_cast<uint8_t>(current); + } + else + { + vec[i] = static_cast<uint8_t>(current); // LCOV_EXCL_LINE } } - return sax->end_object(); + // step 2: convert array into number of type T and return + std::memcpy(&result, vec.data(), sizeof(NumberType)); + return true; + } + + /*! + @brief create a string by reading characters from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[in] len number of characters to read + @param[out] result string created by reading @a len bytes + + @return whether string creation completed + + @note We can not reserve @a len bytes for the result, because @a len + may be too large. Usually, @ref unexpect_eof() detects the end of + the input before we run out of string memory. + */ + template<typename NumberType> + bool get_string(const input_format_t format, + const NumberType len, + string_t& result) + { + bool success = true; + std::generate_n(std::back_inserter(result), len, [this, &success, &format]() + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(format, "string"))) + { + success = false; + } + return static_cast<char>(current); + }); + return success; } /*! + @param[in] format the current format (for diagnostics) + @param[in] context further context information (for diagnostics) @return whether the last read character is not EOF */ - bool unexpect_eof() const + bool unexpect_eof(const input_format_t format, const char* context) const { if (JSON_UNLIKELY(current == std::char_traits<char>::eof())) { - return sax->parse_error(chars_read, "<end of file>", parse_error::create(110, chars_read, "unexpected end of input")); + return sax->parse_error(chars_read, "<end of file>", + parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context))); } return true; } @@ -7639,6 +8257,45 @@ class binary_reader return std::string{cr}; } + /*! + @param[in] format the current format + @param[in] detail a detailed error message + @param[in] context further contect information + @return a message string to use in the parse_error exceptions + */ + std::string exception_message(const input_format_t format, + const std::string& detail, + const std::string& context) const + { + std::string error_msg = "syntax error while parsing "; + + switch (format) + { + case input_format_t::cbor: + error_msg += "CBOR"; + break; + + case input_format_t::msgpack: + error_msg += "MessagePack"; + break; + + case input_format_t::ubjson: + error_msg += "UBJSON"; + break; + + case input_format_t::bson: + error_msg += "BSON"; + break; + + // LCOV_EXCL_START + default: + assert(false); + // LCOV_EXCL_STOP + } + + return error_msg + " " + context + ": " + detail; + } + private: /// input adapter input_adapter_t ia = nullptr; @@ -7655,8 +8312,8 @@ class binary_reader /// the SAX parser json_sax_t* sax = nullptr; }; -} -} +} // namespace detail +} // namespace nlohmann // #include <nlohmann/detail/output/binary_writer.hpp> @@ -7686,6 +8343,8 @@ namespace detail template<typename BasicJsonType, typename CharType> class binary_writer { + using string_t = typename BasicJsonType::string_t; + public: /*! @brief create a binary writer @@ -7698,7 +8357,28 @@ class binary_writer } /*! - @brief[in] j JSON value to serialize + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + void write_bson(const BasicJsonType& j) + { + switch (j.type()) + { + case value_t::object: + { + write_bson_object(*j.m_value.object); + break; + } + + default: + { + JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name()))); + } + } + } + + /*! + @param[in] j JSON value to serialize */ void write_cbor(const BasicJsonType& j) { @@ -7706,15 +8386,15 @@ class binary_writer { case value_t::null: { - oa->write_character(static_cast<CharType>(0xF6)); + oa->write_character(to_char_type(0xF6)); break; } case value_t::boolean: { oa->write_character(j.m_value.boolean - ? static_cast<CharType>(0xF5) - : static_cast<CharType>(0xF4)); + ? to_char_type(0xF5) + : to_char_type(0xF4)); break; } @@ -7731,22 +8411,22 @@ class binary_writer } else if (j.m_value.number_integer <= (std::numeric_limits<uint8_t>::max)()) { - oa->write_character(static_cast<CharType>(0x18)); + oa->write_character(to_char_type(0x18)); write_number(static_cast<uint8_t>(j.m_value.number_integer)); } else if (j.m_value.number_integer <= (std::numeric_limits<uint16_t>::max)()) { - oa->write_character(static_cast<CharType>(0x19)); + oa->write_character(to_char_type(0x19)); write_number(static_cast<uint16_t>(j.m_value.number_integer)); } else if (j.m_value.number_integer <= (std::numeric_limits<uint32_t>::max)()) { - oa->write_character(static_cast<CharType>(0x1A)); + oa->write_character(to_char_type(0x1A)); write_number(static_cast<uint32_t>(j.m_value.number_integer)); } else { - oa->write_character(static_cast<CharType>(0x1B)); + oa->write_character(to_char_type(0x1B)); write_number(static_cast<uint64_t>(j.m_value.number_integer)); } } @@ -7761,22 +8441,22 @@ class binary_writer } else if (positive_number <= (std::numeric_limits<uint8_t>::max)()) { - oa->write_character(static_cast<CharType>(0x38)); + oa->write_character(to_char_type(0x38)); write_number(static_cast<uint8_t>(positive_number)); } else if (positive_number <= (std::numeric_limits<uint16_t>::max)()) { - oa->write_character(static_cast<CharType>(0x39)); + oa->write_character(to_char_type(0x39)); write_number(static_cast<uint16_t>(positive_number)); } else if (positive_number <= (std::numeric_limits<uint32_t>::max)()) { - oa->write_character(static_cast<CharType>(0x3A)); + oa->write_character(to_char_type(0x3A)); write_number(static_cast<uint32_t>(positive_number)); } else { - oa->write_character(static_cast<CharType>(0x3B)); + oa->write_character(to_char_type(0x3B)); write_number(static_cast<uint64_t>(positive_number)); } } @@ -7791,22 +8471,22 @@ class binary_writer } else if (j.m_value.number_unsigned <= (std::numeric_limits<uint8_t>::max)()) { - oa->write_character(static_cast<CharType>(0x18)); + oa->write_character(to_char_type(0x18)); write_number(static_cast<uint8_t>(j.m_value.number_unsigned)); } else if (j.m_value.number_unsigned <= (std::numeric_limits<uint16_t>::max)()) { - oa->write_character(static_cast<CharType>(0x19)); + oa->write_character(to_char_type(0x19)); write_number(static_cast<uint16_t>(j.m_value.number_unsigned)); } else if (j.m_value.number_unsigned <= (std::numeric_limits<uint32_t>::max)()) { - oa->write_character(static_cast<CharType>(0x1A)); + oa->write_character(to_char_type(0x1A)); write_number(static_cast<uint32_t>(j.m_value.number_unsigned)); } else { - oa->write_character(static_cast<CharType>(0x1B)); + oa->write_character(to_char_type(0x1B)); write_number(static_cast<uint64_t>(j.m_value.number_unsigned)); } break; @@ -7829,23 +8509,23 @@ class binary_writer } else if (N <= (std::numeric_limits<uint8_t>::max)()) { - oa->write_character(static_cast<CharType>(0x78)); + oa->write_character(to_char_type(0x78)); write_number(static_cast<uint8_t>(N)); } else if (N <= (std::numeric_limits<uint16_t>::max)()) { - oa->write_character(static_cast<CharType>(0x79)); + oa->write_character(to_char_type(0x79)); write_number(static_cast<uint16_t>(N)); } else if (N <= (std::numeric_limits<uint32_t>::max)()) { - oa->write_character(static_cast<CharType>(0x7A)); + oa->write_character(to_char_type(0x7A)); write_number(static_cast<uint32_t>(N)); } // LCOV_EXCL_START else if (N <= (std::numeric_limits<uint64_t>::max)()) { - oa->write_character(static_cast<CharType>(0x7B)); + oa->write_character(to_char_type(0x7B)); write_number(static_cast<uint64_t>(N)); } // LCOV_EXCL_STOP @@ -7867,23 +8547,23 @@ class binary_writer } else if (N <= (std::numeric_limits<uint8_t>::max)()) { - oa->write_character(static_cast<CharType>(0x98)); + oa->write_character(to_char_type(0x98)); write_number(static_cast<uint8_t>(N)); } else if (N <= (std::numeric_limits<uint16_t>::max)()) { - oa->write_character(static_cast<CharType>(0x99)); + oa->write_character(to_char_type(0x99)); write_number(static_cast<uint16_t>(N)); } else if (N <= (std::numeric_limits<uint32_t>::max)()) { - oa->write_character(static_cast<CharType>(0x9A)); + oa->write_character(to_char_type(0x9A)); write_number(static_cast<uint32_t>(N)); } // LCOV_EXCL_START else if (N <= (std::numeric_limits<uint64_t>::max)()) { - oa->write_character(static_cast<CharType>(0x9B)); + oa->write_character(to_char_type(0x9B)); write_number(static_cast<uint64_t>(N)); } // LCOV_EXCL_STOP @@ -7906,23 +8586,23 @@ class binary_writer } else if (N <= (std::numeric_limits<uint8_t>::max)()) { - oa->write_character(static_cast<CharType>(0xB8)); + oa->write_character(to_char_type(0xB8)); write_number(static_cast<uint8_t>(N)); } else if (N <= (std::numeric_limits<uint16_t>::max)()) { - oa->write_character(static_cast<CharType>(0xB9)); + oa->write_character(to_char_type(0xB9)); write_number(static_cast<uint16_t>(N)); } else if (N <= (std::numeric_limits<uint32_t>::max)()) { - oa->write_character(static_cast<CharType>(0xBA)); + oa->write_character(to_char_type(0xBA)); write_number(static_cast<uint32_t>(N)); } // LCOV_EXCL_START else if (N <= (std::numeric_limits<uint64_t>::max)()) { - oa->write_character(static_cast<CharType>(0xBB)); + oa->write_character(to_char_type(0xBB)); write_number(static_cast<uint64_t>(N)); } // LCOV_EXCL_STOP @@ -7942,7 +8622,7 @@ class binary_writer } /*! - @brief[in] j JSON value to serialize + @param[in] j JSON value to serialize */ void write_msgpack(const BasicJsonType& j) { @@ -7950,15 +8630,15 @@ class binary_writer { case value_t::null: // nil { - oa->write_character(static_cast<CharType>(0xC0)); + oa->write_character(to_char_type(0xC0)); break; } case value_t::boolean: // true and false { oa->write_character(j.m_value.boolean - ? static_cast<CharType>(0xC3) - : static_cast<CharType>(0xC2)); + ? to_char_type(0xC3) + : to_char_type(0xC2)); break; } @@ -7977,25 +8657,25 @@ class binary_writer else if (j.m_value.number_unsigned <= (std::numeric_limits<uint8_t>::max)()) { // uint 8 - oa->write_character(static_cast<CharType>(0xCC)); + oa->write_character(to_char_type(0xCC)); write_number(static_cast<uint8_t>(j.m_value.number_integer)); } else if (j.m_value.number_unsigned <= (std::numeric_limits<uint16_t>::max)()) { // uint 16 - oa->write_character(static_cast<CharType>(0xCD)); + oa->write_character(to_char_type(0xCD)); write_number(static_cast<uint16_t>(j.m_value.number_integer)); } else if (j.m_value.number_unsigned <= (std::numeric_limits<uint32_t>::max)()) { // uint 32 - oa->write_character(static_cast<CharType>(0xCE)); + oa->write_character(to_char_type(0xCE)); write_number(static_cast<uint32_t>(j.m_value.number_integer)); } else if (j.m_value.number_unsigned <= (std::numeric_limits<uint64_t>::max)()) { // uint 64 - oa->write_character(static_cast<CharType>(0xCF)); + oa->write_character(to_char_type(0xCF)); write_number(static_cast<uint64_t>(j.m_value.number_integer)); } } @@ -8010,28 +8690,28 @@ class binary_writer j.m_value.number_integer <= (std::numeric_limits<int8_t>::max)()) { // int 8 - oa->write_character(static_cast<CharType>(0xD0)); + oa->write_character(to_char_type(0xD0)); write_number(static_cast<int8_t>(j.m_value.number_integer)); } else if (j.m_value.number_integer >= (std::numeric_limits<int16_t>::min)() and j.m_value.number_integer <= (std::numeric_limits<int16_t>::max)()) { // int 16 - oa->write_character(static_cast<CharType>(0xD1)); + oa->write_character(to_char_type(0xD1)); write_number(static_cast<int16_t>(j.m_value.number_integer)); } else if (j.m_value.number_integer >= (std::numeric_limits<int32_t>::min)() and j.m_value.number_integer <= (std::numeric_limits<int32_t>::max)()) { // int 32 - oa->write_character(static_cast<CharType>(0xD2)); + oa->write_character(to_char_type(0xD2)); write_number(static_cast<int32_t>(j.m_value.number_integer)); } else if (j.m_value.number_integer >= (std::numeric_limits<int64_t>::min)() and j.m_value.number_integer <= (std::numeric_limits<int64_t>::max)()) { // int 64 - oa->write_character(static_cast<CharType>(0xD3)); + oa->write_character(to_char_type(0xD3)); write_number(static_cast<int64_t>(j.m_value.number_integer)); } } @@ -8048,25 +8728,25 @@ class binary_writer else if (j.m_value.number_unsigned <= (std::numeric_limits<uint8_t>::max)()) { // uint 8 - oa->write_character(static_cast<CharType>(0xCC)); + oa->write_character(to_char_type(0xCC)); write_number(static_cast<uint8_t>(j.m_value.number_integer)); } else if (j.m_value.number_unsigned <= (std::numeric_limits<uint16_t>::max)()) { // uint 16 - oa->write_character(static_cast<CharType>(0xCD)); + oa->write_character(to_char_type(0xCD)); write_number(static_cast<uint16_t>(j.m_value.number_integer)); } else if (j.m_value.number_unsigned <= (std::numeric_limits<uint32_t>::max)()) { // uint 32 - oa->write_character(static_cast<CharType>(0xCE)); + oa->write_character(to_char_type(0xCE)); write_number(static_cast<uint32_t>(j.m_value.number_integer)); } else if (j.m_value.number_unsigned <= (std::numeric_limits<uint64_t>::max)()) { // uint 64 - oa->write_character(static_cast<CharType>(0xCF)); + oa->write_character(to_char_type(0xCF)); write_number(static_cast<uint64_t>(j.m_value.number_integer)); } break; @@ -8091,19 +8771,19 @@ class binary_writer else if (N <= (std::numeric_limits<uint8_t>::max)()) { // str 8 - oa->write_character(static_cast<CharType>(0xD9)); + oa->write_character(to_char_type(0xD9)); write_number(static_cast<uint8_t>(N)); } else if (N <= (std::numeric_limits<uint16_t>::max)()) { // str 16 - oa->write_character(static_cast<CharType>(0xDA)); + oa->write_character(to_char_type(0xDA)); write_number(static_cast<uint16_t>(N)); } else if (N <= (std::numeric_limits<uint32_t>::max)()) { // str 32 - oa->write_character(static_cast<CharType>(0xDB)); + oa->write_character(to_char_type(0xDB)); write_number(static_cast<uint32_t>(N)); } @@ -8126,13 +8806,13 @@ class binary_writer else if (N <= (std::numeric_limits<uint16_t>::max)()) { // array 16 - oa->write_character(static_cast<CharType>(0xDC)); + oa->write_character(to_char_type(0xDC)); write_number(static_cast<uint16_t>(N)); } else if (N <= (std::numeric_limits<uint32_t>::max)()) { // array 32 - oa->write_character(static_cast<CharType>(0xDD)); + oa->write_character(to_char_type(0xDD)); write_number(static_cast<uint32_t>(N)); } @@ -8156,13 +8836,13 @@ class binary_writer else if (N <= (std::numeric_limits<uint16_t>::max)()) { // map 16 - oa->write_character(static_cast<CharType>(0xDE)); + oa->write_character(to_char_type(0xDE)); write_number(static_cast<uint16_t>(N)); } else if (N <= (std::numeric_limits<uint32_t>::max)()) { // map 32 - oa->write_character(static_cast<CharType>(0xDF)); + oa->write_character(to_char_type(0xDF)); write_number(static_cast<uint32_t>(N)); } @@ -8195,7 +8875,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast<CharType>('Z')); + oa->write_character(to_char_type('Z')); } break; } @@ -8203,9 +8883,11 @@ class binary_writer case value_t::boolean: { if (add_prefix) + { oa->write_character(j.m_value.boolean - ? static_cast<CharType>('T') - : static_cast<CharType>('F')); + ? to_char_type('T') + : to_char_type('F')); + } break; } @@ -8231,7 +8913,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast<CharType>('S')); + oa->write_character(to_char_type('S')); } write_number_with_ubjson_prefix(j.m_value.string->size(), true); oa->write_characters( @@ -8244,7 +8926,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast<CharType>('[')); + oa->write_character(to_char_type('[')); } bool prefix_required = true; @@ -8261,14 +8943,14 @@ class binary_writer if (same_prefix) { prefix_required = false; - oa->write_character(static_cast<CharType>('$')); + oa->write_character(to_char_type('$')); oa->write_character(first_prefix); } } if (use_count) { - oa->write_character(static_cast<CharType>('#')); + oa->write_character(to_char_type('#')); write_number_with_ubjson_prefix(j.m_value.array->size(), true); } @@ -8279,7 +8961,7 @@ class binary_writer if (not use_count) { - oa->write_character(static_cast<CharType>(']')); + oa->write_character(to_char_type(']')); } break; @@ -8289,7 +8971,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast<CharType>('{')); + oa->write_character(to_char_type('{')); } bool prefix_required = true; @@ -8306,67 +8988,383 @@ class binary_writer if (same_prefix) { prefix_required = false; - oa->write_character(static_cast<CharType>('$')); + oa->write_character(to_char_type('$')); oa->write_character(first_prefix); } } if (use_count) { - oa->write_character(static_cast<CharType>('#')); + oa->write_character(to_char_type('#')); write_number_with_ubjson_prefix(j.m_value.object->size(), true); } - for (const auto& el : *j.m_value.object) - { - write_number_with_ubjson_prefix(el.first.size(), true); - oa->write_characters( - reinterpret_cast<const CharType*>(el.first.c_str()), - el.first.size()); - write_ubjson(el.second, use_count, use_type, prefix_required); - } + for (const auto& el : *j.m_value.object) + { + write_number_with_ubjson_prefix(el.first.size(), true); + oa->write_characters( + reinterpret_cast<const CharType*>(el.first.c_str()), + el.first.size()); + write_ubjson(el.second, use_count, use_type, prefix_required); + } + + if (not use_count) + { + oa->write_character(to_char_type('}')); + } + + break; + } + + default: + break; + } + } + + private: + ////////// + // BSON // + ////////// + + /*! + @return The size of a BSON document entry header, including the id marker + and the entry name size (and its null-terminator). + */ + static std::size_t calc_bson_entry_header_size(const string_t& name) + { + const auto it = name.find(static_cast<typename string_t::value_type>(0)); + if (JSON_UNLIKELY(it != BasicJsonType::string_t::npos)) + { + JSON_THROW(out_of_range::create(409, + "BSON key cannot contain code point U+0000 (at byte " + std::to_string(it) + ")")); + } + + return /*id*/ 1ul + name.size() + /*zero-terminator*/1u; + } + + /*! + @brief Writes the given @a element_type and @a name to the output adapter + */ + void write_bson_entry_header(const string_t& name, + const std::uint8_t element_type) + { + oa->write_character(to_char_type(element_type)); // boolean + oa->write_characters( + reinterpret_cast<const CharType*>(name.c_str()), + name.size() + 1u); + } + + /*! + @brief Writes a BSON element with key @a name and boolean value @a value + */ + void write_bson_boolean(const string_t& name, + const bool value) + { + write_bson_entry_header(name, 0x08); + oa->write_character(value ? to_char_type(0x01) : to_char_type(0x00)); + } + + /*! + @brief Writes a BSON element with key @a name and double value @a value + */ + void write_bson_double(const string_t& name, + const double value) + { + write_bson_entry_header(name, 0x01); + write_number<double, true>(value); + } + + /*! + @return The size of the BSON-encoded string in @a value + */ + static std::size_t calc_bson_string_size(const string_t& value) + { + return sizeof(std::int32_t) + value.size() + 1ul; + } + + /*! + @brief Writes a BSON element with key @a name and string value @a value + */ + void write_bson_string(const string_t& name, + const string_t& value) + { + write_bson_entry_header(name, 0x02); + + write_number<std::int32_t, true>(static_cast<std::int32_t>(value.size() + 1ul)); + oa->write_characters( + reinterpret_cast<const CharType*>(value.c_str()), + value.size() + 1); + } + + /*! + @brief Writes a BSON element with key @a name and null value + */ + void write_bson_null(const string_t& name) + { + write_bson_entry_header(name, 0x0A); + } + + /*! + @return The size of the BSON-encoded integer @a value + */ + static std::size_t calc_bson_integer_size(const std::int64_t value) + { + if ((std::numeric_limits<std::int32_t>::min)() <= value and value <= (std::numeric_limits<std::int32_t>::max)()) + { + return sizeof(std::int32_t); + } + else + { + return sizeof(std::int64_t); + } + } + + /*! + @brief Writes a BSON element with key @a name and integer @a value + */ + void write_bson_integer(const string_t& name, + const std::int64_t value) + { + if ((std::numeric_limits<std::int32_t>::min)() <= value and value <= (std::numeric_limits<std::int32_t>::max)()) + { + write_bson_entry_header(name, 0x10); // int32 + write_number<std::int32_t, true>(static_cast<std::int32_t>(value)); + } + else + { + write_bson_entry_header(name, 0x12); // int64 + write_number<std::int64_t, true>(static_cast<std::int64_t>(value)); + } + } + + /*! + @return The size of the BSON-encoded unsigned integer in @a j + */ + static constexpr std::size_t calc_bson_unsigned_size(const std::uint64_t value) noexcept + { + return (value <= static_cast<std::uint64_t>((std::numeric_limits<std::int32_t>::max)())) + ? sizeof(std::int32_t) + : sizeof(std::int64_t); + } + + /*! + @brief Writes a BSON element with key @a name and unsigned @a value + */ + void write_bson_unsigned(const string_t& name, + const std::uint64_t value) + { + if (value <= static_cast<std::uint64_t>((std::numeric_limits<std::int32_t>::max)())) + { + write_bson_entry_header(name, 0x10 /* int32 */); + write_number<std::int32_t, true>(static_cast<std::int32_t>(value)); + } + else if (value <= static_cast<std::uint64_t>((std::numeric_limits<std::int64_t>::max)())) + { + write_bson_entry_header(name, 0x12 /* int64 */); + write_number<std::int64_t, true>(static_cast<std::int64_t>(value)); + } + else + { + JSON_THROW(out_of_range::create(407, "integer number " + std::to_string(value) + " cannot be represented by BSON as it does not fit int64")); + } + } + + /*! + @brief Writes a BSON element with key @a name and object @a value + */ + void write_bson_object_entry(const string_t& name, + const typename BasicJsonType::object_t& value) + { + write_bson_entry_header(name, 0x03); // object + write_bson_object(value); + } + + /*! + @return The size of the BSON-encoded array @a value + */ + static std::size_t calc_bson_array_size(const typename BasicJsonType::array_t& value) + { + std::size_t embedded_document_size = 0ul; + std::size_t array_index = 0ul; + + for (const auto& el : value) + { + embedded_document_size += calc_bson_element_size(std::to_string(array_index++), el); + } + + return sizeof(std::int32_t) + embedded_document_size + 1ul; + } + + /*! + @brief Writes a BSON element with key @a name and array @a value + */ + void write_bson_array(const string_t& name, + const typename BasicJsonType::array_t& value) + { + write_bson_entry_header(name, 0x04); // array + write_number<std::int32_t, true>(static_cast<std::int32_t>(calc_bson_array_size(value))); + + std::size_t array_index = 0ul; + + for (const auto& el : value) + { + write_bson_element(std::to_string(array_index++), el); + } + + oa->write_character(to_char_type(0x00)); + } + + /*! + @brief Calculates the size necessary to serialize the JSON value @a j with its @a name + @return The calculated size for the BSON document entry for @a j with the given @a name. + */ + static std::size_t calc_bson_element_size(const string_t& name, + const BasicJsonType& j) + { + const auto header_size = calc_bson_entry_header_size(name); + switch (j.type()) + { + case value_t::object: + return header_size + calc_bson_object_size(*j.m_value.object); + + case value_t::array: + return header_size + calc_bson_array_size(*j.m_value.array); + + case value_t::boolean: + return header_size + 1ul; + + case value_t::number_float: + return header_size + 8ul; + + case value_t::number_integer: + return header_size + calc_bson_integer_size(j.m_value.number_integer); + + case value_t::number_unsigned: + return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned); + + case value_t::string: + return header_size + calc_bson_string_size(*j.m_value.string); + + case value_t::null: + return header_size + 0ul; + + // LCOV_EXCL_START + default: + assert(false); + return 0ul; + // LCOV_EXCL_STOP + }; + } + + /*! + @brief Serializes the JSON value @a j to BSON and associates it with the + key @a name. + @param name The name to associate with the JSON entity @a j within the + current BSON document + @return The size of the BSON entry + */ + void write_bson_element(const string_t& name, + const BasicJsonType& j) + { + switch (j.type()) + { + case value_t::object: + return write_bson_object_entry(name, *j.m_value.object); + + case value_t::array: + return write_bson_array(name, *j.m_value.array); + + case value_t::boolean: + return write_bson_boolean(name, j.m_value.boolean); + + case value_t::number_float: + return write_bson_double(name, j.m_value.number_float); + + case value_t::number_integer: + return write_bson_integer(name, j.m_value.number_integer); + + case value_t::number_unsigned: + return write_bson_unsigned(name, j.m_value.number_unsigned); - if (not use_count) - { - oa->write_character(static_cast<CharType>('}')); - } + case value_t::string: + return write_bson_string(name, *j.m_value.string); - break; - } + case value_t::null: + return write_bson_null(name); + // LCOV_EXCL_START default: - break; - } + assert(false); + return; + // LCOV_EXCL_STOP + }; } - private: - /* - @brief write a number to output input + /*! + @brief Calculates the size of the BSON serialization of the given + JSON-object @a j. + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + static std::size_t calc_bson_object_size(const typename BasicJsonType::object_t& value) + { + std::size_t document_size = std::accumulate(value.begin(), value.end(), 0ul, + [](size_t result, const typename BasicJsonType::object_t::value_type & el) + { + return result += calc_bson_element_size(el.first, el.second); + }); - @param[in] n number of type @a NumberType - @tparam NumberType the type of the number + return sizeof(std::int32_t) + document_size + 1ul; + } - @note This function needs to respect the system's endianess, because bytes - in CBOR, MessagePack, and UBJSON are stored in network order (big - endian) and therefore need reordering on little endian systems. + /*! + @param[in] j JSON value to serialize + @pre j.type() == value_t::object */ - template<typename NumberType> - void write_number(const NumberType n) + void write_bson_object(const typename BasicJsonType::object_t& value) { - // step 1: write number to array of length NumberType - std::array<CharType, sizeof(NumberType)> vec; - std::memcpy(vec.data(), &n, sizeof(NumberType)); + write_number<std::int32_t, true>(static_cast<std::int32_t>(calc_bson_object_size(value))); - // step 2: write array to output (with possible reordering) - if (is_little_endian) + for (const auto& el : value) { - // reverse byte order prior to conversion if necessary - std::reverse(vec.begin(), vec.end()); + write_bson_element(el.first, el.second); } - oa->write_characters(vec.data(), sizeof(NumberType)); + oa->write_character(to_char_type(0x00)); + } + + ////////// + // CBOR // + ////////// + + static constexpr CharType get_cbor_float_prefix(float /*unused*/) + { + return to_char_type(0xFA); // Single-Precision Float + } + + static constexpr CharType get_cbor_float_prefix(double /*unused*/) + { + return to_char_type(0xFB); // Double-Precision Float } + ///////////// + // MsgPack // + ///////////// + + static constexpr CharType get_msgpack_float_prefix(float /*unused*/) + { + return to_char_type(0xCA); // float 32 + } + + static constexpr CharType get_msgpack_float_prefix(double /*unused*/) + { + return to_char_type(0xCB); // float 64 + } + + //////////// + // UBJSON // + //////////// + // UBJSON: write number (floating point) template<typename NumberType, typename std::enable_if< std::is_floating_point<NumberType>::value, int>::type = 0> @@ -8390,7 +9388,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast<CharType>('i')); // int8 + oa->write_character(to_char_type('i')); // int8 } write_number(static_cast<uint8_t>(n)); } @@ -8398,7 +9396,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast<CharType>('U')); // uint8 + oa->write_character(to_char_type('U')); // uint8 } write_number(static_cast<uint8_t>(n)); } @@ -8406,7 +9404,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast<CharType>('I')); // int16 + oa->write_character(to_char_type('I')); // int16 } write_number(static_cast<int16_t>(n)); } @@ -8414,7 +9412,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast<CharType>('l')); // int32 + oa->write_character(to_char_type('l')); // int32 } write_number(static_cast<int32_t>(n)); } @@ -8422,13 +9420,13 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast<CharType>('L')); // int64 + oa->write_character(to_char_type('L')); // int64 } write_number(static_cast<int64_t>(n)); } else { - JSON_THROW(out_of_range::create(407, "number overflow serializing " + std::to_string(n))); + JSON_THROW(out_of_range::create(407, "integer number " + std::to_string(n) + " cannot be represented by UBJSON as it does not fit int64")); } } @@ -8443,7 +9441,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast<CharType>('i')); // int8 + oa->write_character(to_char_type('i')); // int8 } write_number(static_cast<int8_t>(n)); } @@ -8451,7 +9449,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast<CharType>('U')); // uint8 + oa->write_character(to_char_type('U')); // uint8 } write_number(static_cast<uint8_t>(n)); } @@ -8459,7 +9457,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast<CharType>('I')); // int16 + oa->write_character(to_char_type('I')); // int16 } write_number(static_cast<int16_t>(n)); } @@ -8467,7 +9465,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast<CharType>('l')); // int32 + oa->write_character(to_char_type('l')); // int32 } write_number(static_cast<int32_t>(n)); } @@ -8475,14 +9473,14 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast<CharType>('L')); // int64 + oa->write_character(to_char_type('L')); // int64 } write_number(static_cast<int64_t>(n)); } // LCOV_EXCL_START else { - JSON_THROW(out_of_range::create(407, "number overflow serializing " + std::to_string(n))); + JSON_THROW(out_of_range::create(407, "integer number " + std::to_string(n) + " cannot be represented by UBJSON as it does not fit int64")); } // LCOV_EXCL_STOP } @@ -8512,22 +9510,20 @@ class binary_writer { return 'i'; } - else if ((std::numeric_limits<uint8_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<uint8_t>::max)()) + if ((std::numeric_limits<uint8_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<uint8_t>::max)()) { return 'U'; } - else if ((std::numeric_limits<int16_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<int16_t>::max)()) + if ((std::numeric_limits<int16_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<int16_t>::max)()) { return 'I'; } - else if ((std::numeric_limits<int32_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<int32_t>::max)()) + if ((std::numeric_limits<int32_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<int32_t>::max)()) { return 'l'; } - else // no check and assume int64_t (see note above) - { - return 'L'; - } + // no check and assume int64_t (see note above) + return 'L'; } case value_t::number_unsigned: @@ -8536,22 +9532,20 @@ class binary_writer { return 'i'; } - else if (j.m_value.number_unsigned <= (std::numeric_limits<uint8_t>::max)()) + if (j.m_value.number_unsigned <= (std::numeric_limits<uint8_t>::max)()) { return 'U'; } - else if (j.m_value.number_unsigned <= (std::numeric_limits<int16_t>::max)()) + if (j.m_value.number_unsigned <= (std::numeric_limits<int16_t>::max)()) { return 'I'; } - else if (j.m_value.number_unsigned <= (std::numeric_limits<int32_t>::max)()) + if (j.m_value.number_unsigned <= (std::numeric_limits<int32_t>::max)()) { return 'l'; } - else // no check and assume int64_t (see note above) - { - return 'L'; - } + // no check and assume int64_t (see note above) + return 'L'; } case value_t::number_float: @@ -8571,34 +9565,87 @@ class binary_writer } } - static constexpr CharType get_cbor_float_prefix(float) + static constexpr CharType get_ubjson_float_prefix(float /*unused*/) + { + return 'd'; // float 32 + } + + static constexpr CharType get_ubjson_float_prefix(double /*unused*/) { - return static_cast<CharType>(0xFA); // Single-Precision Float + return 'D'; // float 64 } - static constexpr CharType get_cbor_float_prefix(double) + /////////////////////// + // Utility functions // + /////////////////////// + + /* + @brief write a number to output input + @param[in] n number of type @a NumberType + @tparam NumberType the type of the number + @tparam OutputIsLittleEndian Set to true if output data is + required to be little endian + + @note This function needs to respect the system's endianess, because bytes + in CBOR, MessagePack, and UBJSON are stored in network order (big + endian) and therefore need reordering on little endian systems. + */ + template<typename NumberType, bool OutputIsLittleEndian = false> + void write_number(const NumberType n) { - return static_cast<CharType>(0xFB); // Double-Precision Float + // step 1: write number to array of length NumberType + std::array<CharType, sizeof(NumberType)> vec; + std::memcpy(vec.data(), &n, sizeof(NumberType)); + + // step 2: write array to output (with possible reordering) + if (is_little_endian and not OutputIsLittleEndian) + { + // reverse byte order prior to conversion if necessary + std::reverse(vec.begin(), vec.end()); + } + + oa->write_characters(vec.data(), sizeof(NumberType)); } - static constexpr CharType get_msgpack_float_prefix(float) + public: + // The following to_char_type functions are implement the conversion + // between uint8_t and CharType. In case CharType is not unsigned, + // such a conversion is required to allow values greater than 128. + // See <https://github.com/nlohmann/json/issues/1286> for a discussion. + template < typename C = CharType, + enable_if_t < std::is_signed<C>::value and std::is_signed<char>::value > * = nullptr > + static constexpr CharType to_char_type(std::uint8_t x) noexcept { - return static_cast<CharType>(0xCA); // float 32 + return *reinterpret_cast<char*>(&x); } - static constexpr CharType get_msgpack_float_prefix(double) + template < typename C = CharType, + enable_if_t < std::is_signed<C>::value and std::is_unsigned<char>::value > * = nullptr > + static CharType to_char_type(std::uint8_t x) noexcept { - return static_cast<CharType>(0xCB); // float 64 + static_assert(sizeof(std::uint8_t) == sizeof(CharType), "size of CharType must be equal to std::uint8_t"); + static_assert(std::is_pod<CharType>::value, "CharType must be POD"); + CharType result; + std::memcpy(&result, &x, sizeof(x)); + return result; } - static constexpr CharType get_ubjson_float_prefix(float) + template<typename C = CharType, + enable_if_t<std::is_unsigned<C>::value>* = nullptr> + static constexpr CharType to_char_type(std::uint8_t x) noexcept { - return 'd'; // float 32 + return x; } - static constexpr CharType get_ubjson_float_prefix(double) + template < typename InputCharType, typename C = CharType, + enable_if_t < + std::is_signed<C>::value and + std::is_signed<char>::value and + std::is_same<char, typename std::remove_cv<InputCharType>::type>::value + > * = nullptr > + static constexpr CharType to_char_type(InputCharType x) noexcept { - return 'D'; // float 64 + return x; } private: @@ -8608,8 +9655,8 @@ class binary_writer /// the output output_adapter_t<CharType> oa = nullptr; }; -} -} +} // namespace detail +} // namespace nlohmann // #include <nlohmann/detail/output/serializer.hpp> @@ -8679,10 +9726,9 @@ struct diyfp // f * 2^e { static constexpr int kPrecision = 64; // = q - uint64_t f; - int e; + uint64_t f = 0; + int e = 0; - constexpr diyfp() noexcept : f(0), e(0) {} constexpr diyfp(uint64_t f_, int e_) noexcept : f(f_), e(e_) {} /*! @@ -8694,7 +9740,7 @@ struct diyfp // f * 2^e assert(x.e == y.e); assert(x.f >= y.f); - return diyfp(x.f - y.f, x.e); + return {x.f - y.f, x.e}; } /*! @@ -8759,7 +9805,7 @@ struct diyfp // f * 2^e const uint64_t h = p3 + p2_hi + p1_hi + (Q >> 32); - return diyfp(h, x.e + y.e + 64); + return {h, x.e + y.e + 64}; } /*! @@ -8790,7 +9836,7 @@ struct diyfp // f * 2^e assert(delta >= 0); assert(((x.f << delta) >> delta) == x.f); - return diyfp(x.f << delta, target_exponent); + return {x.f << delta, target_exponent}; } }; @@ -9093,7 +10139,7 @@ inline cached_power get_cached_power_for_binary_exponent(int e) assert(e >= -1500); assert(e <= 1500); const int f = kAlpha - e - 1; - const int k = (f * 78913) / (1 << 18) + (f > 0); + const int k = (f * 78913) / (1 << 18) + static_cast<int>(f > 0); const int index = (-kCachedPowersMinDecExp + k + (kCachedPowersDecStep - 1)) / kCachedPowersDecStep; assert(index >= 0); @@ -9241,7 +10287,7 @@ inline void grisu2_digit_gen(char* buffer, int& length, int& decimal_exponent, const diyfp one(uint64_t{1} << -M_plus.e, M_plus.e); - uint32_t p1 = static_cast<uint32_t>(M_plus.f >> -one.e); // p1 = f div 2^-e (Since -e >= 32, p1 fits into a 32-bit int.) + auto p1 = static_cast<uint32_t>(M_plus.f >> -one.e); // p1 = f div 2^-e (Since -e >= 32, p1 fits into a 32-bit int.) uint64_t p2 = M_plus.f & (one.f - 1); // p2 = f mod 2^-e // 1) @@ -9560,7 +10606,7 @@ inline char* append_exponent(char* buf, int e) *buf++ = '+'; } - uint32_t k = static_cast<uint32_t>(e); + auto k = static_cast<uint32_t>(e); if (k < 10) { // Always print at least two digits in the exponent. @@ -9678,7 +10724,7 @@ format. Returns an iterator pointing past-the-end of the decimal representation. @note The result is NOT null-terminated. */ template <typename FloatType> -char* to_chars(char* first, char* last, FloatType value) +char* to_chars(char* first, const char* last, FloatType value) { static_cast<void>(last); // maybe unused - fix warning assert(std::isfinite(value)); @@ -9730,6 +10776,8 @@ char* to_chars(char* first, char* last, FloatType value) // #include <nlohmann/detail/meta/cpp_future.hpp> +// #include <nlohmann/detail/output/binary_writer.hpp> + // #include <nlohmann/detail/output/output_adapters.hpp> // #include <nlohmann/detail/value_t.hpp> @@ -9743,6 +10791,14 @@ namespace detail // serialization // /////////////////// +/// how to treat decoding errors +enum class error_handler_t +{ + strict, ///< throw a type_error exception in case of invalid UTF-8 + replace, ///< replace invalid UTF-8 sequences with U+FFFD + ignore ///< ignore invalid UTF-8 sequences +}; + template<typename BasicJsonType> class serializer { @@ -9757,17 +10813,25 @@ class serializer /*! @param[in] s output stream to serialize to @param[in] ichar indentation character to use - */ - serializer(output_adapter_t<char> s, const char ichar) - : o(std::move(s)), loc(std::localeconv()), - thousands_sep(loc->thousands_sep == nullptr ? '\0' : * (loc->thousands_sep)), - decimal_point(loc->decimal_point == nullptr ? '\0' : * (loc->decimal_point)), - indent_char(ichar), indent_string(512, indent_char) + @param[in] error_handler_ how to react on decoding errors + */ + serializer(output_adapter_t<char> s, const char ichar, + error_handler_t error_handler_ = error_handler_t::strict) + : o(std::move(s)) + , loc(std::localeconv()) + , thousands_sep(loc->thousands_sep == nullptr ? '\0' : * (loc->thousands_sep)) + , decimal_point(loc->decimal_point == nullptr ? '\0' : * (loc->decimal_point)) + , indent_char(ichar) + , indent_string(512, indent_char) + , error_handler(error_handler_) {} // delete because of pointer members serializer(const serializer&) = delete; serializer& operator=(const serializer&) = delete; + serializer(serializer&&) = delete; + serializer& operator=(serializer&&) = delete; + ~serializer() = default; /*! @brief internal implementation of the serialization function @@ -9999,6 +11063,10 @@ class serializer uint8_t state = UTF8_ACCEPT; std::size_t bytes = 0; // number of bytes written to string_buffer + // number of bytes written at the point of the last valid byte + std::size_t bytes_after_last_accept = 0; + std::size_t undumped_chars = 0; + for (std::size_t i = 0; i < s.size(); ++i) { const auto byte = static_cast<uint8_t>(s[i]); @@ -10096,14 +11164,69 @@ class serializer o->write_characters(string_buffer.data(), bytes); bytes = 0; } + + // remember the byte position of this accept + bytes_after_last_accept = bytes; + undumped_chars = 0; break; } case UTF8_REJECT: // decode found invalid UTF-8 byte { - std::string sn(3, '\0'); - snprintf(&sn[0], sn.size(), "%.2X", byte); - JSON_THROW(type_error::create(316, "invalid UTF-8 byte at index " + std::to_string(i) + ": 0x" + sn)); + switch (error_handler) + { + case error_handler_t::strict: + { + std::string sn(3, '\0'); + snprintf(&sn[0], sn.size(), "%.2X", byte); + JSON_THROW(type_error::create(316, "invalid UTF-8 byte at index " + std::to_string(i) + ": 0x" + sn)); + } + + case error_handler_t::ignore: + case error_handler_t::replace: + { + // in case we saw this character the first time, we + // would like to read it again, because the byte + // may be OK for itself, but just not OK for the + // previous sequence + if (undumped_chars > 0) + { + --i; + } + + // reset length buffer to the last accepted index; + // thus removing/ignoring the invalid characters + bytes = bytes_after_last_accept; + + if (error_handler == error_handler_t::replace) + { + // add a replacement character + if (ensure_ascii) + { + string_buffer[bytes++] = '\\'; + string_buffer[bytes++] = 'u'; + string_buffer[bytes++] = 'f'; + string_buffer[bytes++] = 'f'; + string_buffer[bytes++] = 'f'; + string_buffer[bytes++] = 'd'; + } + else + { + string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xEF'); + string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xBF'); + string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xBD'); + } + bytes_after_last_accept = bytes; + } + + undumped_chars = 0; + + // continue processing the string + state = UTF8_ACCEPT; + break; + } + } + break; } default: // decode found yet incomplete multi-byte code point @@ -10113,11 +11236,13 @@ class serializer // code point will not be escaped - copy byte to buffer string_buffer[bytes++] = s[i]; } + ++undumped_chars; break; } } } + // we finished processing the string if (JSON_LIKELY(state == UTF8_ACCEPT)) { // write buffer @@ -10129,9 +11254,38 @@ class serializer else { // we finish reading, but do not accept: string was incomplete - std::string sn(3, '\0'); - snprintf(&sn[0], sn.size(), "%.2X", static_cast<uint8_t>(s.back())); - JSON_THROW(type_error::create(316, "incomplete UTF-8 string; last byte: 0x" + sn)); + switch (error_handler) + { + case error_handler_t::strict: + { + std::string sn(3, '\0'); + snprintf(&sn[0], sn.size(), "%.2X", static_cast<uint8_t>(s.back())); + JSON_THROW(type_error::create(316, "incomplete UTF-8 string; last byte: 0x" + sn)); + } + + case error_handler_t::ignore: + { + // write all accepted bytes + o->write_characters(string_buffer.data(), bytes_after_last_accept); + break; + } + + case error_handler_t::replace: + { + // write all accepted bytes + o->write_characters(string_buffer.data(), bytes_after_last_accept); + // add a replacement character + if (ensure_ascii) + { + o->write_characters("\\ufffd", 6); + } + else + { + o->write_characters("\xEF\xBF\xBD", 3); + } + break; + } + } } } @@ -10157,7 +11311,7 @@ class serializer return; } - const bool is_negative = not (x >= 0); // see issue #755 + const bool is_negative = std::is_same<NumberType, number_integer_t>::value and not (x >= 0); // see issue #755 std::size_t i = 0; while (x != 0) @@ -10341,9 +11495,12 @@ class serializer const char indent_char; /// the indentation string string_t indent_string; + + /// error_handler how to react on decoding errors + const error_handler_t error_handler; }; -} -} +} // namespace detail +} // namespace nlohmann // #include <nlohmann/detail/json_ref.hpp> @@ -10351,6 +11508,9 @@ class serializer #include <initializer_list> #include <utility> +// #include <nlohmann/detail/meta/type_traits.hpp> + + namespace nlohmann { namespace detail @@ -10373,15 +11533,19 @@ class json_ref : owned_value(init), value_ref(&owned_value), is_rvalue(true) {} - template<class... Args> - json_ref(Args&& ... args) - : owned_value(std::forward<Args>(args)...), value_ref(&owned_value), is_rvalue(true) - {} + template < + class... Args, + enable_if_t<std::is_constructible<value_type, Args...>::value, int> = 0 > + json_ref(Args && ... args) + : owned_value(std::forward<Args>(args)...), value_ref(&owned_value), + is_rvalue(true) {} // class should be movable only json_ref(json_ref&&) = default; json_ref(const json_ref&) = delete; json_ref& operator=(const json_ref&) = delete; + json_ref& operator=(json_ref&&) = delete; + ~json_ref() = default; value_type moved_or_copied() const { @@ -10407,8 +11571,8 @@ class json_ref value_type* value_ref = nullptr; const bool is_rvalue; }; -} -} +} // namespace detail +} // namespace nlohmann // #include <nlohmann/detail/json_pointer.hpp> @@ -10475,7 +11639,7 @@ class json_pointer @since version 2.0.0 */ - std::string to_string() const noexcept + std::string to_string() const { return std::accumulate(reference_tokens.begin(), reference_tokens.end(), std::string{}, @@ -10530,7 +11694,7 @@ class json_pointer } /// return whether pointer points to the root document - bool is_root() const + bool is_root() const noexcept { return reference_tokens.empty(); } @@ -10922,11 +12086,11 @@ class json_pointer std::size_t slash = reference_string.find_first_of('/', 1), // set the beginning of the first reference token start = 1; - // we can stop if start == string::npos+1 = 0 + // we can stop if start == 0 (if slash == std::string::npos) start != 0; // set the beginning of the next reference token // (will eventually be 0 if slash == std::string::npos) - start = slash + 1, + start = (slash == std::string::npos) ? 0 : slash + 1, // find next slash slash = reference_string.find_first_of('/', start)) { @@ -10982,7 +12146,7 @@ class json_pointer {} } - /// escape "~"" to "~0" and "/" to "~1" + /// escape "~" to "~0" and "/" to "~1" static std::string escape(std::string s) { replace_substring(s, "~", "~0"); @@ -11109,7 +12273,7 @@ class json_pointer /// the reference tokens std::vector<std::string> reference_tokens; }; -} +} // namespace nlohmann // #include <nlohmann/adl_serializer.hpp> @@ -11123,6 +12287,7 @@ class json_pointer namespace nlohmann { + template<typename, typename> struct adl_serializer { @@ -11132,14 +12297,13 @@ struct adl_serializer This function is usually called by the `get()` function of the @ref basic_json class (either explicit or via conversion operators). - @param[in] j JSON value to read from + @param[in] j JSON value to read from @param[in,out] val value to write to */ template<typename BasicJsonType, typename ValueType> static auto from_json(BasicJsonType&& j, ValueType& val) noexcept( - noexcept(::nlohmann::from_json(std::forward<BasicJsonType>(j), val))) -> decltype( - ::nlohmann::from_json(std::forward<BasicJsonType>(j), val), void() - ) + noexcept(::nlohmann::from_json(std::forward<BasicJsonType>(j), val))) + -> decltype(::nlohmann::from_json(std::forward<BasicJsonType>(j), val), void()) { ::nlohmann::from_json(std::forward<BasicJsonType>(j), val); } @@ -11151,18 +12315,18 @@ struct adl_serializer class. @param[in,out] j JSON value to write to - @param[in] val value to read from + @param[in] val value to read from */ template <typename BasicJsonType, typename ValueType> static auto to_json(BasicJsonType& j, ValueType&& val) noexcept( noexcept(::nlohmann::to_json(j, std::forward<ValueType>(val)))) - -> decltype(::nlohmann::to_json(j, std::forward<ValueType>(val)), - void()) + -> decltype(::nlohmann::to_json(j, std::forward<ValueType>(val)), void()) { ::nlohmann::to_json(j, std::forward<ValueType>(val)); } }; -} + +} // namespace nlohmann /*! @@ -11303,6 +12467,8 @@ class basic_json using json_pointer = ::nlohmann::json_pointer<basic_json>; template<typename T, typename SFINAE> using json_serializer = JSONSerializer<T, SFINAE>; + /// how to treat decoding errors + using error_handler_t = detail::error_handler_t; /// helper type for initializer lists of basic_json values using initializer_list_t = std::initializer_list<detail::json_ref<basic_json>>; @@ -12042,7 +13208,7 @@ class basic_json object = nullptr; // silence warning, see #821 if (JSON_UNLIKELY(t == value_t::null)) { - JSON_THROW(other_error::create(500, "961c151d2e87f2686a955a9be24d316f1362bf21 3.3.0")); // LCOV_EXCL_LINE + JSON_THROW(other_error::create(500, "961c151d2e87f2686a955a9be24d316f1362bf21 3.4.0")); // LCOV_EXCL_LINE } break; } @@ -12961,7 +14127,7 @@ class basic_json @since version 1.0.0 */ - reference& operator=(basic_json other) noexcept ( + basic_json& operator=(basic_json other) noexcept ( std::is_nothrow_move_constructible<value_t>::value and std::is_nothrow_move_assignable<value_t>::value and std::is_nothrow_move_constructible<json_value>::value and @@ -13027,6 +14193,10 @@ class basic_json @param[in] ensure_ascii If @a ensure_ascii is true, all non-ASCII characters in the output are escaped with `\uXXXX` sequences, and the result consists of ASCII characters only. + @param[in] error_handler how to react on decoding errors; there are three + possible values: `strict` (throws and exception in case a decoding error + occurs; default), `replace` (replace invalid UTF-8 sequences with U+FFFD), + and `ignore` (ignore invalid UTF-8 sequences during serialization). @return string containing the serialization of the JSON value @@ -13045,13 +14215,16 @@ class basic_json @see https://docs.python.org/2/library/json.html#json.dump @since version 1.0.0; indentation character @a indent_char, option - @a ensure_ascii and exceptions added in version 3.0.0 + @a ensure_ascii and exceptions added in version 3.0.0; error + handlers added in version 3.4.0. */ - string_t dump(const int indent = -1, const char indent_char = ' ', - const bool ensure_ascii = false) const + string_t dump(const int indent = -1, + const char indent_char = ' ', + const bool ensure_ascii = false, + const error_handler_t error_handler = error_handler_t::strict) const { string_t result; - serializer s(detail::output_adapter<char, string_t>(result), indent_char); + serializer s(detail::output_adapter<char, string_t>(result), indent_char, error_handler); if (indent >= 0) { @@ -14491,7 +15664,7 @@ class basic_json /*! @brief overload for a default value of type const char* - @copydoc basic_json::value(const typename object_t::key_type&, ValueType) const + @copydoc basic_json::value(const typename object_t::key_type&, const ValueType&) const */ string_t value(const typename object_t::key_type& key, const char* default_value) const { @@ -17094,6 +18267,8 @@ class basic_json @param[in] cb a parser callback function of type @ref parser_callback_t which is used to control the deserialization by filtering unwanted values (optional) + @param[in] allow_exceptions whether to throw exceptions in case of a + parse error (optional, true by default) @return result of the deserialization @@ -17489,7 +18664,7 @@ class basic_json vector in CBOR format.,to_cbor} @sa http://cbor.io - @sa @ref from_cbor(detail::input_adapter, const bool strict) for the + @sa @ref from_cbor(detail::input_adapter&&, const bool, const bool) for the analogous deserialization @sa @ref to_msgpack(const basic_json&) for the related MessagePack format @sa @ref to_ubjson(const basic_json&, const bool, const bool) for the @@ -17586,8 +18761,7 @@ class basic_json vector in MessagePack format.,to_msgpack} @sa http://msgpack.org - @sa @ref from_msgpack(const std::vector<uint8_t>&, const size_t) for the - analogous deserialization + @sa @ref from_msgpack for the analogous deserialization @sa @ref to_cbor(const basic_json& for the related CBOR format @sa @ref to_ubjson(const basic_json&, const bool, const bool) for the related UBJSON format @@ -17684,7 +18858,7 @@ class basic_json vector in UBJSON format.,to_ubjson} @sa http://ubjson.org - @sa @ref from_ubjson(detail::input_adapter, const bool strict) for the + @sa @ref from_ubjson(detail::input_adapter&&, const bool, const bool) for the analogous deserialization @sa @ref to_cbor(const basic_json& for the related CBOR format @sa @ref to_msgpack(const basic_json&) for the related MessagePack format @@ -17712,6 +18886,91 @@ class basic_json binary_writer<char>(o).write_ubjson(j, use_size, use_type); } + + /*! + @brief Serializes the given JSON object `j` to BSON and returns a vector + containing the corresponding BSON-representation. + + BSON (Binary JSON) is a binary format in which zero or more ordered key/value pairs are + stored as a single entity (a so-called document). + + The library uses the following mapping from JSON values types to BSON types: + + JSON value type | value/range | BSON type | marker + --------------- | --------------------------------- | ----------- | ------ + null | `null` | null | 0x0A + boolean | `true`, `false` | boolean | 0x08 + number_integer | -9223372036854775808..-2147483649 | int64 | 0x12 + number_integer | -2147483648..2147483647 | int32 | 0x10 + number_integer | 2147483648..9223372036854775807 | int64 | 0x12 + number_unsigned | 0..2147483647 | int32 | 0x10 + number_unsigned | 2147483648..9223372036854775807 | int64 | 0x12 + number_unsigned | 9223372036854775808..18446744073709551615| -- | -- + number_float | *any value* | double | 0x01 + string | *any value* | string | 0x02 + array | *any value* | document | 0x04 + object | *any value* | document | 0x03 + + @warning The mapping is **incomplete**, since only JSON-objects (and things + contained therein) can be serialized to BSON. + Also, integers larger than 9223372036854775807 cannot be serialized to BSON, + and the keys may not contain U+0000, since they are serialized a + zero-terminated c-strings. + + @throw out_of_range.407 if `j.is_number_unsigned() && j.get<std::uint64_t>() > 9223372036854775807` + @throw out_of_range.409 if a key in `j` contains a NULL (U+0000) + @throw type_error.317 if `!j.is_object()` + + @pre The input `j` is required to be an object: `j.is_object() == true`. + + @note Any BSON output created via @ref to_bson can be successfully parsed + by @ref from_bson. + + @param[in] j JSON value to serialize + @return BSON serialization as byte vector + + @complexity Linear in the size of the JSON value @a j. + + @liveexample{The example shows the serialization of a JSON value to a byte + vector in BSON format.,to_bson} + + @sa http://bsonspec.org/spec.html + @sa @ref from_bson(detail::input_adapter&&, const bool strict) for the + analogous deserialization + @sa @ref to_ubjson(const basic_json&, const bool, const bool) for the + related UBJSON format + @sa @ref to_cbor(const basic_json&) for the related CBOR format + @sa @ref to_msgpack(const basic_json&) for the related MessagePack format + */ + static std::vector<uint8_t> to_bson(const basic_json& j) + { + std::vector<uint8_t> result; + to_bson(j, result); + return result; + } + + /*! + @brief Serializes the given JSON object `j` to BSON and forwards the + corresponding BSON-representation to the given output_adapter `o`. + @param j The JSON object to convert to BSON. + @param o The output adapter that receives the binary BSON representation. + @pre The input `j` shall be an object: `j.is_object() == true` + @sa @ref to_bson(const basic_json&) + */ + static void to_bson(const basic_json& j, detail::output_adapter<uint8_t> o) + { + binary_writer<uint8_t>(o).write_bson(j); + } + + /*! + @copydoc to_bson(const basic_json&, detail::output_adapter<uint8_t>) + */ + static void to_bson(const basic_json& j, detail::output_adapter<char> o) + { + binary_writer<char>(o).write_bson(j); + } + + /*! @brief create a JSON value from an input in CBOR format @@ -17753,7 +19012,7 @@ class basic_json map | object | 0xBF False | `false` | 0xF4 True | `true` | 0xF5 - Nill | `null` | 0xF6 + Null | `null` | 0xF6 Half-Precision Float | number_float | 0xF9 Single-Precision Float | number_float | 0xFA Double-Precision Float | number_float | 0xFB @@ -17799,14 +19058,14 @@ class basic_json @sa http://cbor.io @sa @ref to_cbor(const basic_json&) for the analogous serialization - @sa @ref from_msgpack(detail::input_adapter, const bool, const bool) for the + @sa @ref from_msgpack(detail::input_adapter&&, const bool, const bool) for the related MessagePack format - @sa @ref from_ubjson(detail::input_adapter, const bool, const bool) for the + @sa @ref from_ubjson(detail::input_adapter&&, const bool, const bool) for the related UBJSON format @since version 2.0.9; parameter @a start_index since 2.1.1; changed to consume input adapters, removed start_index parameter, and added - @a strict parameter since 3.0.0; added @allow_exceptions parameter + @a strict parameter since 3.0.0; added @a allow_exceptions parameter since 3.2.0 */ static basic_json from_cbor(detail::input_adapter&& i, @@ -17820,7 +19079,7 @@ class basic_json } /*! - @copydoc from_cbor(detail::input_adapter, const bool, const bool) + @copydoc from_cbor(detail::input_adapter&&, const bool, const bool) */ template<typename A1, typename A2, detail::enable_if_t<std::is_constructible<detail::input_adapter, A1, A2>::value, int> = 0> @@ -17902,14 +19161,16 @@ class basic_json @sa http://msgpack.org @sa @ref to_msgpack(const basic_json&) for the analogous serialization - @sa @ref from_cbor(detail::input_adapter, const bool, const bool) for the + @sa @ref from_cbor(detail::input_adapter&&, const bool, const bool) for the related CBOR format - @sa @ref from_ubjson(detail::input_adapter, const bool, const bool) for + @sa @ref from_ubjson(detail::input_adapter&&, const bool, const bool) for the related UBJSON format + @sa @ref from_bson(detail::input_adapter&&, const bool, const bool) for + the related BSON format @since version 2.0.9; parameter @a start_index since 2.1.1; changed to consume input adapters, removed start_index parameter, and added - @a strict parameter since 3.0.0; added @allow_exceptions parameter + @a strict parameter since 3.0.0; added @a allow_exceptions parameter since 3.2.0 */ static basic_json from_msgpack(detail::input_adapter&& i, @@ -17923,7 +19184,7 @@ class basic_json } /*! - @copydoc from_msgpack(detail::input_adapter, const bool, const bool) + @copydoc from_msgpack(detail::input_adapter&&, const bool, const bool) */ template<typename A1, typename A2, detail::enable_if_t<std::is_constructible<detail::input_adapter, A1, A2>::value, int> = 0> @@ -17987,12 +19248,14 @@ class basic_json @sa http://ubjson.org @sa @ref to_ubjson(const basic_json&, const bool, const bool) for the analogous serialization - @sa @ref from_cbor(detail::input_adapter, const bool, const bool) for the + @sa @ref from_cbor(detail::input_adapter&&, const bool, const bool) for the related CBOR format - @sa @ref from_msgpack(detail::input_adapter, const bool, const bool) for + @sa @ref from_msgpack(detail::input_adapter&&, const bool, const bool) for the related MessagePack format + @sa @ref from_bson(detail::input_adapter&&, const bool, const bool) for + the related BSON format - @since version 3.1.0; added @allow_exceptions parameter since 3.2.0 + @since version 3.1.0; added @a allow_exceptions parameter since 3.2.0 */ static basic_json from_ubjson(detail::input_adapter&& i, const bool strict = true, @@ -18005,7 +19268,7 @@ class basic_json } /*! - @copydoc from_ubjson(detail::input_adapter, const bool, const bool) + @copydoc from_ubjson(detail::input_adapter&&, const bool, const bool) */ template<typename A1, typename A2, detail::enable_if_t<std::is_constructible<detail::input_adapter, A1, A2>::value, int> = 0> @@ -18019,6 +19282,91 @@ class basic_json return res ? result : basic_json(value_t::discarded); } + /*! + @brief Create a JSON value from an input in BSON format + + Deserializes a given input @a i to a JSON value using the BSON (Binary JSON) + serialization format. + + The library maps BSON record types to JSON value types as follows: + + BSON type | BSON marker byte | JSON value type + --------------- | ---------------- | --------------------------- + double | 0x01 | number_float + string | 0x02 | string + document | 0x03 | object + array | 0x04 | array + binary | 0x05 | still unsupported + undefined | 0x06 | still unsupported + ObjectId | 0x07 | still unsupported + boolean | 0x08 | boolean + UTC Date-Time | 0x09 | still unsupported + null | 0x0A | null + Regular Expr. | 0x0B | still unsupported + DB Pointer | 0x0C | still unsupported + JavaScript Code | 0x0D | still unsupported + Symbol | 0x0E | still unsupported + JavaScript Code | 0x0F | still unsupported + int32 | 0x10 | number_integer + Timestamp | 0x11 | still unsupported + 128-bit decimal float | 0x13 | still unsupported + Max Key | 0x7F | still unsupported + Min Key | 0xFF | still unsupported + + @warning The mapping is **incomplete**. The unsupported mappings + are indicated in the table above. + + @param[in] i an input in BSON format convertible to an input adapter + @param[in] strict whether to expect the input to be consumed until EOF + (true by default) + @param[in] allow_exceptions whether to throw exceptions in case of a + parse error (optional, true by default) + + @return deserialized JSON value + + @throw parse_error.114 if an unsupported BSON record type is encountered + + @complexity Linear in the size of the input @a i. + + @liveexample{The example shows the deserialization of a byte vector in + BSON format to a JSON value.,from_bson} + + @sa http://bsonspec.org/spec.html + @sa @ref to_bson(const basic_json&) for the analogous serialization + @sa @ref from_cbor(detail::input_adapter&&, const bool, const bool) for the + related CBOR format + @sa @ref from_msgpack(detail::input_adapter&&, const bool, const bool) for + the related MessagePack format + @sa @ref from_ubjson(detail::input_adapter&&, const bool, const bool) for the + related UBJSON format + */ + static basic_json from_bson(detail::input_adapter&& i, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(input_format_t::bson, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + /*! + @copydoc from_bson(detail::input_adapter&&, const bool, const bool) + */ + template<typename A1, typename A2, + detail::enable_if_t<std::is_constructible<detail::input_adapter, A1, A2>::value, int> = 0> + static basic_json from_bson(A1 && a1, A2 && a2, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(std::forward<A1>(a1), std::forward<A2>(a2))).sax_parse(input_format_t::bson, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + + /// @} ////////////////////////// @@ -18384,11 +19732,9 @@ class basic_json // avoid undefined behavior JSON_THROW(out_of_range::create(401, "array index " + std::to_string(idx) + " is out of range")); } - else - { - // default case: insert add offset - parent.insert(parent.begin() + static_cast<difference_type>(idx), val); - } + + // default case: insert add offset + parent.insert(parent.begin() + static_cast<difference_type>(idx), val); } break; } diff --git a/include/nlohmann/json_fwd.hpp b/include/nlohmann/json_fwd.hpp index 5ff0d75373ab7559a3b0a3a5c71590736dd30dd2..32abba9130c8cc2277fca2a56758423d0b33d911 100644 --- a/include/nlohmann/json_fwd.hpp +++ b/include/nlohmann/json_fwd.hpp @@ -59,6 +59,6 @@ uses the standard template types. @since version 1.0.0 */ using json = basic_json<>; -} +} // namespace nlohmann #endif