🎨 clean up binary formats

parent 4d1eaace
......@@ -125,14 +125,38 @@ class binary_reader
}
private:
//////////
// BSON //
//////////
/*!
@brief Reads in a BSON-object and passes it to the SAX-parser.
@return whether a valid BSON-value was passed to the SAX parser
*/
bool parse_bson_internal()
{
std::int32_t documentSize;
get_number<std::int32_t, true>(input_format_t::bson, documentSize);
if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1))))
{
return false;
}
if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false)))
{
return false;
}
return sax->end_object();
}
/*!
@brief Parses a C-style string from the BSON input.
@param [out] result A reference to the string variable where the read string
is to be stored.
@return `true` if the \x00-byte indicating the end of the
string was encountered before the EOF.
`false` indicates an unexpected EOF.
@param[in, out] result A reference to the string variable where the read
string is to be stored.
@return `true` if the \x00-byte indicating the end of the string was
encountered before the EOF; false` indicates an unexpected EOF.
*/
bool get_bson_cstr(string_t& result)
{
......@@ -155,107 +179,107 @@ class binary_reader
}
/*!
@brief Parses a zero-terminated string of length @a len from the BSON input.
@param [in] len The length (including the zero-byte at the end) of the string to be read.
@param [out] result A reference to the string variable where the read string
is to be stored.
@brief Parses a zero-terminated string of length @a len from the BSON
input.
@param[in] len The length (including the zero-byte at the end) of the
string to be read.
@param[in, out] result A reference to the string variable where the read
string is to be stored.
@tparam NumberType The type of the length @a len
@pre len > 0
@return `true` if the string was successfully parsed
*/
template <typename NumberType>
template<typename NumberType>
bool get_bson_string(const NumberType len, string_t& result)
{
return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result)
&& get() != std::char_traits<char>::eof();
}
/*!
@return A hexadecimal string representation of the given @a byte
@param byte The byte to convert to a string
*/
static std::string byte_hexstring(unsigned char byte)
{
char cr[3];
snprintf(cr, sizeof(cr), "%02hhX", byte);
return std::string{cr};
return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) and get() != std::char_traits<char>::eof();
}
/*!
@brief Read a BSON document element of the given @a element_type.
@param element_type The BSON element type, c.f. http://bsonspec.org/spec.html
@param element_type_parse_position The position in the input stream, where the `element_type` was read.
@warning Not all BSON element types are supported yet. An unsupported @a element_type will
give rise to a parse_error.114: Unsupported BSON record type 0x...
@param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html
@param[in] element_type_parse_position The position in the input stream,
where the `element_type` was read.
@warning Not all BSON element types are supported yet. An unsupported
@a element_type will give rise to a parse_error.114:
Unsupported BSON record type 0x...
@return whether a valid BSON-object/array was passed to the SAX parser
*/
bool parse_bson_element_internal(int element_type, std::size_t element_type_parse_position)
bool parse_bson_element_internal(const int element_type,
const std::size_t element_type_parse_position)
{
switch (element_type)
{
case 0x01: // double
{
double number;
return get_number<double, true>(input_format_t::bson, number)
&& sax->number_float(static_cast<number_float_t>(number), "");
return get_number<double, true>(input_format_t::bson, number) and sax->number_float(static_cast<number_float_t>(number), "");
}
case 0x02: // string
{
std::int32_t len;
string_t value;
return get_number<std::int32_t, true>(input_format_t::bson, len)
&& get_bson_string(len, value)
&& sax->string(value);
return get_number<std::int32_t, true>(input_format_t::bson, len) and get_bson_string(len, value) and sax->string(value);
}
case 0x08: // boolean
case 0x03: // object
{
return sax->boolean(static_cast<bool>(get()));
return parse_bson_internal();
}
case 0x10: // int32
case 0x04: // array
{
std::int32_t value;
return get_number<std::int32_t, true>(input_format_t::bson, value)
&& sax->number_integer(static_cast<std::int32_t>(value));
return parse_bson_array();
}
case 0x12: // int64
case 0x08: // boolean
{
std::int64_t value;
return get_number<std::int64_t, true>(input_format_t::bson, value)
&& sax->number_integer(static_cast<std::int64_t>(value));
return sax->boolean(static_cast<bool>(get()));
}
case 0x0A: // null
{
return sax->null();
}
case 0x03: // object
case 0x10: // int32
{
return parse_bson_internal();
std::int32_t value;
return get_number<std::int32_t, true>(input_format_t::bson, value) and sax->number_integer(static_cast<std::int32_t>(value));
}
case 0x04: // array
case 0x12: // int64
{
return parse_bson_array();
std::int64_t value;
return get_number<std::int64_t, true>(input_format_t::bson, value) and sax->number_integer(static_cast<std::int64_t>(value));
}
default: // anything else not supported (yet)
{
auto element_type_str = byte_hexstring(element_type);
return sax->parse_error(element_type_parse_position, element_type_str, parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + element_type_str));
char cr[3];
snprintf(cr, sizeof(cr), "%.2hhX", static_cast<unsigned char>(element_type));
return sax->parse_error(element_type_parse_position, std::string(cr), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr)));
}
}
}
/*!
@brief Read a BSON element list (as specified in the BSON-spec) from the input
and passes it to the SAX-parser.
The same binary layout is used for objects and arrays, hence it must
be indicated with the argument @a is_array which one is expected
(true --> array, false --> object).
@param is_array Determines if the element list being read is to be treated as
an object (@a is_array == false), or as an array (@a is_array == true).
@brief Read a BSON element list (as specified in the BSON-spec)
The same binary layout is used for objects and arrays, hence it must be
indicated with the argument @a is_array which one is expected
(true --> array, false --> object).
@param[in] is_array Determines if the element list being read is to be
treated as an object (@a is_array == false), or as an
array (@a is_array == true).
@return whether a valid BSON-object/array was passed to the SAX parser
*/
bool parse_bson_element_list(bool is_array)
bool parse_bson_element_list(const bool is_array)
{
while (auto element_type = get())
string_t key;
while (int element_type = get())
{
if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list")))
{
......@@ -263,13 +287,12 @@ class binary_reader
}
const std::size_t element_type_parse_position = chars_read;
string_t key;
if (JSON_UNLIKELY(not get_bson_cstr(key)))
{
return false;
}
if (!is_array)
if (not is_array)
{
sax->key(key);
}
......@@ -278,7 +301,11 @@ class binary_reader
{
return false;
}
// get_bson_cstr only appends
key.clear();
}
return true;
}
......@@ -291,7 +318,7 @@ class binary_reader
std::int32_t documentSize;
get_number<std::int32_t, true>(input_format_t::bson, documentSize);
if (JSON_UNLIKELY(not sax->start_array(-1)))
if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1))))
{
return false;
}
......@@ -304,27 +331,9 @@ class binary_reader
return sax->end_array();
}
/*!
@brief Reads in a BSON-object and pass it to the SAX-parser.
@return whether a valid BSON-value was passed to the SAX parser
*/
bool parse_bson_internal()
{
std::int32_t documentSize;
get_number<std::int32_t, true>(input_format_t::bson, documentSize);
if (JSON_UNLIKELY(not sax->start_object(-1)))
{
return false;
}
if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false)))
{
return false;
}
return sax->end_object();
}
//////////
// CBOR //
//////////
/*!
@param[in] get_char whether a new character should be retrieved from the
......@@ -665,113 +674,26 @@ class binary_reader
}
/*!
@return whether a valid MessagePack value was passed to the SAX parser
@brief reads a CBOR string
This function first reads starting bytes to determine the expected
string length and then copies this number of bytes into a string.
Additionally, CBOR's strings with indefinite lengths are supported.
@param[out] result created string
@return whether string creation completed
*/
bool parse_msgpack_internal()
bool get_cbor_string(string_t& result)
{
switch (get())
if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string")))
{
// EOF
case std::char_traits<char>::eof():
return unexpect_eof(input_format_t::msgpack, "value");
return false;
}
// positive fixint
case 0x00:
case 0x01:
case 0x02:
case 0x03:
case 0x04:
case 0x05:
case 0x06:
case 0x07:
case 0x08:
case 0x09:
case 0x0A:
case 0x0B:
case 0x0C:
case 0x0D:
case 0x0E:
case 0x0F:
case 0x10:
case 0x11:
case 0x12:
case 0x13:
case 0x14:
case 0x15:
case 0x16:
case 0x17:
case 0x18:
case 0x19:
case 0x1A:
case 0x1B:
case 0x1C:
case 0x1D:
case 0x1E:
case 0x1F:
case 0x20:
case 0x21:
case 0x22:
case 0x23:
case 0x24:
case 0x25:
case 0x26:
case 0x27:
case 0x28:
case 0x29:
case 0x2A:
case 0x2B:
case 0x2C:
case 0x2D:
case 0x2E:
case 0x2F:
case 0x30:
case 0x31:
case 0x32:
case 0x33:
case 0x34:
case 0x35:
case 0x36:
case 0x37:
case 0x38:
case 0x39:
case 0x3A:
case 0x3B:
case 0x3C:
case 0x3D:
case 0x3E:
case 0x3F:
case 0x40:
case 0x41:
case 0x42:
case 0x43:
case 0x44:
case 0x45:
case 0x46:
case 0x47:
case 0x48:
case 0x49:
case 0x4A:
case 0x4B:
case 0x4C:
case 0x4D:
case 0x4E:
case 0x4F:
case 0x50:
case 0x51:
case 0x52:
case 0x53:
case 0x54:
case 0x55:
case 0x56:
case 0x57:
case 0x58:
case 0x59:
case 0x5A:
case 0x5B:
case 0x5C:
case 0x5D:
case 0x5E:
case 0x5F:
switch (current)
{
// UTF-8 string (0x00..0x17 bytes follow)
case 0x60:
case 0x61:
case 0x62:
......@@ -796,371 +718,254 @@ class binary_reader
case 0x75:
case 0x76:
case 0x77:
case 0x78:
case 0x79:
case 0x7A:
case 0x7B:
case 0x7C:
case 0x7D:
case 0x7E:
case 0x7F:
return sax->number_unsigned(static_cast<number_unsigned_t>(current));
// fixmap
case 0x80:
case 0x81:
case 0x82:
case 0x83:
case 0x84:
case 0x85:
case 0x86:
case 0x87:
case 0x88:
case 0x89:
case 0x8A:
case 0x8B:
case 0x8C:
case 0x8D:
case 0x8E:
case 0x8F:
return get_msgpack_object(static_cast<std::size_t>(current & 0x0F));
{
return get_string(input_format_t::cbor, current & 0x1F, result);
}
// fixarray
case 0x90:
case 0x91:
case 0x92:
case 0x93:
case 0x94:
case 0x95:
case 0x96:
case 0x97:
case 0x98:
case 0x99:
case 0x9A:
case 0x9B:
case 0x9C:
case 0x9D:
case 0x9E:
case 0x9F:
return get_msgpack_array(static_cast<std::size_t>(current & 0x0F));
case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
{
uint8_t len;
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
}
// fixstr
case 0xA0:
case 0xA1:
case 0xA2:
case 0xA3:
case 0xA4:
case 0xA5:
case 0xA6:
case 0xA7:
case 0xA8:
case 0xA9:
case 0xAA:
case 0xAB:
case 0xAC:
case 0xAD:
case 0xAE:
case 0xAF:
case 0xB0:
case 0xB1:
case 0xB2:
case 0xB3:
case 0xB4:
case 0xB5:
case 0xB6:
case 0xB7:
case 0xB8:
case 0xB9:
case 0xBA:
case 0xBB:
case 0xBC:
case 0xBD:
case 0xBE:
case 0xBF:
{
string_t s;
return get_msgpack_string(s) and sax->string(s);
}
case 0xC0: // nil
return sax->null();
case 0xC2: // false
return sax->boolean(false);
case 0xC3: // true
return sax->boolean(true);
case 0xCA: // float 32
{
float number;
return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), "");
}
case 0xCB: // float 64
{
double number;
return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), "");
}
case 0xCC: // uint 8
{
uint8_t number;
return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
}
case 0xCD: // uint 16
{
uint16_t number;
return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
}
case 0xCE: // uint 32
{
uint32_t number;
return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
}
case 0xCF: // uint 64
{
uint64_t number;
return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
}
case 0xD0: // int 8
{
int8_t number;
return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
}
case 0xD1: // int 16
{
int16_t number;
return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
}
case 0xD2: // int 32
{
int32_t number;
return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
}
case 0xD3: // int 64
{
int64_t number;
return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
}
case 0xD9: // str 8
case 0xDA: // str 16
case 0xDB: // str 32
{
string_t s;
return get_msgpack_string(s) and sax->string(s);
}
case 0xDC: // array 16
case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
{
uint16_t len;
return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len));
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
}
case 0xDD: // array 32
case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
{
uint32_t len;
return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len));
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
}
case 0xDE: // map 16
case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
{
uint16_t len;
return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len));
uint64_t len;
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
}
case 0xDF: // map 32
case 0x7F: // UTF-8 string (indefinite length)
{
uint32_t len;
return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len));
while (get() != 0xFF)
{
string_t chunk;
if (not get_cbor_string(chunk))
{
return false;
}
result.append(chunk);
}
return true;
}
// negative fixint
case 0xE0:
case 0xE1:
case 0xE2:
case 0xE3:
case 0xE4:
case 0xE5:
case 0xE6:
case 0xE7:
case 0xE8:
case 0xE9:
case 0xEA:
case 0xEB:
case 0xEC:
case 0xED:
case 0xEE:
case 0xEF:
case 0xF0:
case 0xF1:
case 0xF2:
case 0xF3:
case 0xF4:
case 0xF5:
case 0xF6:
case 0xF7:
case 0xF8:
case 0xF9:
case 0xFA:
case 0xFB:
case 0xFC:
case 0xFD:
case 0xFE:
case 0xFF:
return sax->number_integer(static_cast<int8_t>(current));
default: // anything else
default:
{
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::msgpack, "invalid byte: 0x" + last_token, "value")));
return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string")));
}
}
}
/*!
@param[in] get_char whether a new character should be retrieved from the
input (true, default) or whether the last read
character should be considered instead
@return whether a valid UBJSON value was passed to the SAX parser
*/
bool parse_ubjson_internal(const bool get_char = true)
{
return get_ubjson_value(get_char ? get_ignore_noop() : current);
}
/*!
@brief get next character from the input
This function provides the interface to the used input adapter. It does
not throw in case the input reached EOF, but returns a -'ve valued
`std::char_traits<char>::eof()` in that case.
@return character read from the input
*/
int get()
{
++chars_read;
return (current = ia->get_character());
}
/*!
@return character read from the input after ignoring all 'N' entries
@param[in] len the length of the array or std::size_t(-1) for an
array of indefinite size
@return whether array creation completed
*/
int get_ignore_noop()
bool get_cbor_array(const std::size_t len)
{
do
if (JSON_UNLIKELY(not sax->start_array(len)))
{
get();
return false;
}
while (current == 'N');
return current;
}
/*
@brief read a number from the input
@tparam NumberType the type of the number
@param[in] format the current format (for diagnostics)
@param[out] result number of type @a NumberType
@return whether conversion completed
@note This function needs to respect the system's endianess, because
bytes in CBOR, MessagePack, and UBJSON are stored in network order
(big endian) and therefore need reordering on little endian systems.
*/
template<typename NumberType, bool InputIsLittleEndian = false>
bool get_number(const input_format_t format, NumberType& result)
{
// step 1: read input into array with system's byte order
std::array<uint8_t, sizeof(NumberType)> vec;
for (std::size_t i = 0; i < sizeof(NumberType); ++i)
if (len != std::size_t(-1))
{
get();
if (JSON_UNLIKELY(not unexpect_eof(format, "number")))
{
return false;
}
// reverse byte order prior to conversion if necessary
if (is_little_endian && !InputIsLittleEndian)
for (std::size_t i = 0; i < len; ++i)
{
vec[sizeof(NumberType) - i - 1] = static_cast<uint8_t>(current);
if (JSON_UNLIKELY(not parse_cbor_internal()))
{
return false;
}
}
else
}
else
{
while (get() != 0xFF)
{
vec[i] = static_cast<uint8_t>(current); // LCOV_EXCL_LINE
if (JSON_UNLIKELY(not parse_cbor_internal(false)))
{
return false;
}
}
}
// step 2: convert array into number of type T and return
std::memcpy(&result, vec.data(), sizeof(NumberType));
return true;
return sax->end_array();
}
/*!
@brief create a string by reading characters from the input
@tparam NumberType the type of the number
@param[in] format the current format (for diagnostics)
@param[in] len number of characters to read
@param[out] result string created by reading @a len bytes
@return whether string creation completed
@note We can not reserve @a len bytes for the result, because @a len
may be too large. Usually, @ref unexpect_eof() detects the end of
the input before we run out of string memory.
@param[in] len the length of the object or std::size_t(-1) for an
object of indefinite size
@return whether object creation completed
*/
template<typename NumberType>
bool get_string(const input_format_t format, const NumberType len, string_t& result)
bool get_cbor_object(const std::size_t len)
{
bool success = true;
std::generate_n(std::back_inserter(result), len, [this, &success, &format]()
if (not JSON_UNLIKELY(sax->start_object(len)))
{
get();
if (JSON_UNLIKELY(not unexpect_eof(format, "string")))
return false;
}
string_t key;
if (len != std::size_t(-1))
{
for (std::size_t i = 0; i < len; ++i)
{
success = false;
get();
if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key)))
{
return false;
}
if (JSON_UNLIKELY(not parse_cbor_internal()))
{
return false;
}
key.clear();
}
return static_cast<char>(current);
});
return success;
}
}
else
{
while (get() != 0xFF)
{
if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key)))
{
return false;
}
/*!
@brief reads a CBOR string
if (JSON_UNLIKELY(not parse_cbor_internal()))
{
return false;
}
key.clear();
}
}
This function first reads starting bytes to determine the expected
string length and then copies this number of bytes into a string.
Additionally, CBOR's strings with indefinite lengths are supported.
return sax->end_object();
}
@param[out] result created string
/////////////
// MsgPack //
/////////////
@return whether string creation completed
/*!
@return whether a valid MessagePack value was passed to the SAX parser
*/
bool get_cbor_string(string_t& result)
bool parse_msgpack_internal()
{
if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string")))
{
return false;
}
switch (current)
switch (get())
{
// UTF-8 string (0x00..0x17 bytes follow)
// EOF
case std::char_traits<char>::eof():
return unexpect_eof(input_format_t::msgpack, "value");
// positive fixint
case 0x00:
case 0x01:
case 0x02:
case 0x03:
case 0x04:
case 0x05:
case 0x06:
case 0x07:
case 0x08:
case 0x09:
case 0x0A:
case 0x0B:
case 0x0C:
case 0x0D:
case 0x0E:
case 0x0F:
case 0x10:
case 0x11:
case 0x12:
case 0x13:
case 0x14:
case 0x15:
case 0x16:
case 0x17:
case 0x18:
case 0x19:
case 0x1A:
case 0x1B:
case 0x1C:
case 0x1D:
case 0x1E:
case 0x1F:
case 0x20:
case 0x21:
case 0x22:
case 0x23:
case 0x24:
case 0x25:
case 0x26:
case 0x27:
case 0x28:
case 0x29:
case 0x2A:
case 0x2B:
case 0x2C:
case 0x2D:
case 0x2E:
case 0x2F:
case 0x30:
case 0x31:
case 0x32:
case 0x33:
case 0x34:
case 0x35:
case 0x36:
case 0x37:
case 0x38:
case 0x39:
case 0x3A:
case 0x3B:
case 0x3C:
case 0x3D:
case 0x3E:
case 0x3F:
case 0x40:
case 0x41:
case 0x42:
case 0x43:
case 0x44:
case 0x45:
case 0x46:
case 0x47:
case 0x48:
case 0x49:
case 0x4A:
case 0x4B:
case 0x4C:
case 0x4D:
case 0x4E:
case 0x4F:
case 0x50:
case 0x51:
case 0x52:
case 0x53:
case 0x54:
case 0x55:
case 0x56:
case 0x57:
case 0x58:
case 0x59:
case 0x5A:
case 0x5B:
case 0x5C:
case 0x5D:
case 0x5E:
case 0x5F:
case 0x60:
case 0x61:
case 0x62:
......@@ -1185,140 +990,234 @@ class binary_reader
case 0x75:
case 0x76:
case 0x77:
case 0x78:
case 0x79:
case 0x7A:
case 0x7B:
case 0x7C:
case 0x7D:
case 0x7E:
case 0x7F:
return sax->number_unsigned(static_cast<number_unsigned_t>(current));
// fixmap
case 0x80:
case 0x81:
case 0x82:
case 0x83:
case 0x84:
case 0x85:
case 0x86:
case 0x87:
case 0x88:
case 0x89:
case 0x8A:
case 0x8B:
case 0x8C:
case 0x8D:
case 0x8E:
case 0x8F:
return get_msgpack_object(static_cast<std::size_t>(current & 0x0F));
// fixarray
case 0x90:
case 0x91:
case 0x92:
case 0x93:
case 0x94:
case 0x95:
case 0x96:
case 0x97:
case 0x98:
case 0x99:
case 0x9A:
case 0x9B:
case 0x9C:
case 0x9D:
case 0x9E:
case 0x9F:
return get_msgpack_array(static_cast<std::size_t>(current & 0x0F));
// fixstr
case 0xA0:
case 0xA1:
case 0xA2:
case 0xA3:
case 0xA4:
case 0xA5:
case 0xA6:
case 0xA7:
case 0xA8:
case 0xA9:
case 0xAA:
case 0xAB:
case 0xAC:
case 0xAD:
case 0xAE:
case 0xAF:
case 0xB0:
case 0xB1:
case 0xB2:
case 0xB3:
case 0xB4:
case 0xB5:
case 0xB6:
case 0xB7:
case 0xB8:
case 0xB9:
case 0xBA:
case 0xBB:
case 0xBC:
case 0xBD:
case 0xBE:
case 0xBF:
{
return get_string(input_format_t::cbor, current & 0x1F, result);
string_t s;
return get_msgpack_string(s) and sax->string(s);
}
case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
case 0xC0: // nil
return sax->null();
case 0xC2: // false
return sax->boolean(false);
case 0xC3: // true
return sax->boolean(true);
case 0xCA: // float 32
{
uint8_t len;
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
float number;
return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), "");
}
case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
case 0xCB: // float 64
{
uint16_t len;
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
double number;
return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), "");
}
case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
case 0xCC: // uint 8
{
uint32_t len;
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
uint8_t number;
return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
}
case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
case 0xCD: // uint 16
{
uint64_t len;
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
uint16_t number;
return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
}
case 0x7F: // UTF-8 string (indefinite length)
case 0xCE: // uint 32
{
while (get() != 0xFF)
{
string_t chunk;
if (not get_cbor_string(chunk))
{
return false;
}
result.append(chunk);
}
return true;
uint32_t number;
return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
}
case 0xCF: // uint 64
{
uint64_t number;
return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
}
case 0xD0: // int 8
{
int8_t number;
return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
}
case 0xD1: // int 16
{
int16_t number;
return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
}
case 0xD2: // int 32
{
int32_t number;
return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
}
case 0xD3: // int 64
{
int64_t number;
return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
}
default:
case 0xD9: // str 8
case 0xDA: // str 16
case 0xDB: // str 32
{
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string")));
string_t s;
return get_msgpack_string(s) and sax->string(s);
}
}
}
/*!
@param[in] len the length of the array or std::size_t(-1) for an
array of indefinite size
@return whether array creation completed
*/
bool get_cbor_array(const std::size_t len)
{
if (JSON_UNLIKELY(not sax->start_array(len)))
{
return false;
}
if (len != std::size_t(-1))
{
for (std::size_t i = 0; i < len; ++i)
case 0xDC: // array 16
{
if (JSON_UNLIKELY(not parse_cbor_internal()))
{
return false;
}
uint16_t len;
return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len));
}
}
else
{
while (get() != 0xFF)
case 0xDD: // array 32
{
if (JSON_UNLIKELY(not parse_cbor_internal(false)))
{
return false;
}
uint32_t len;
return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len));
}
}
return sax->end_array();
}
/*!
@param[in] len the length of the object or std::size_t(-1) for an
object of indefinite size
@return whether object creation completed
*/
bool get_cbor_object(const std::size_t len)
{
if (not JSON_UNLIKELY(sax->start_object(len)))
{
return false;
}
string_t key;
if (len != std::size_t(-1))
{
for (std::size_t i = 0; i < len; ++i)
case 0xDE: // map 16
{
get();
if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key)))
{
return false;
}
if (JSON_UNLIKELY(not parse_cbor_internal()))
{
return false;
}
key.clear();
uint16_t len;
return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len));
}
}
else
{
while (get() != 0xFF)
case 0xDF: // map 32
{
if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key)))
{
return false;
}
uint32_t len;
return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len));
}
if (JSON_UNLIKELY(not parse_cbor_internal()))
{
return false;
}
key.clear();
// negative fixint
case 0xE0:
case 0xE1:
case 0xE2:
case 0xE3:
case 0xE4:
case 0xE5:
case 0xE6:
case 0xE7:
case 0xE8:
case 0xE9:
case 0xEA:
case 0xEB:
case 0xEC:
case 0xED:
case 0xEE:
case 0xEF:
case 0xF0:
case 0xF1:
case 0xF2:
case 0xF3:
case 0xF4:
case 0xF5:
case 0xF6:
case 0xF7:
case 0xF8:
case 0xF9:
case 0xFA:
case 0xFB:
case 0xFC:
case 0xFD:
case 0xFE:
case 0xFF:
return sax->number_integer(static_cast<int8_t>(current));
default: // anything else
{
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::msgpack, "invalid byte: 0x" + last_token, "value")));
}
}
return sax->end_object();
}
/*!
......@@ -1455,6 +1354,22 @@ class binary_reader
return sax->end_object();
}
////////////
// UBJSON //
////////////
/*!
@param[in] get_char whether a new character should be retrieved from the
input (true, default) or whether the last read
character should be considered instead
@return whether a valid UBJSON value was passed to the SAX parser
*/
bool parse_ubjson_internal(const bool get_char = true)
{
return get_ubjson_value(get_char ? get_ignore_noop() : current);
}
/*!
@brief reads a UBJSON string
......@@ -1869,6 +1784,113 @@ class binary_reader
return sax->end_object();
}
///////////////////////
// Utility functions //
///////////////////////
/*!
@brief get next character from the input
This function provides the interface to the used input adapter. It does
not throw in case the input reached EOF, but returns a -'ve valued
`std::char_traits<char>::eof()` in that case.
@return character read from the input
*/
int get()
{
++chars_read;
return (current = ia->get_character());
}
/*!
@return character read from the input after ignoring all 'N' entries
*/
int get_ignore_noop()
{
do
{
get();
}
while (current == 'N');
return current;
}
/*
@brief read a number from the input
@tparam NumberType the type of the number
@param[in] format the current format (for diagnostics)
@param[out] result number of type @a NumberType
@return whether conversion completed
@note This function needs to respect the system's endianess, because
bytes in CBOR, MessagePack, and UBJSON are stored in network order
(big endian) and therefore need reordering on little endian systems.
*/
template<typename NumberType, bool InputIsLittleEndian = false>
bool get_number(const input_format_t format, NumberType& result)
{
// step 1: read input into array with system's byte order
std::array<uint8_t, sizeof(NumberType)> vec;
for (std::size_t i = 0; i < sizeof(NumberType); ++i)
{
get();
if (JSON_UNLIKELY(not unexpect_eof(format, "number")))
{
return false;
}
// reverse byte order prior to conversion if necessary
if (is_little_endian && !InputIsLittleEndian)
{
vec[sizeof(NumberType) - i - 1] = static_cast<uint8_t>(current);
}
else
{
vec[i] = static_cast<uint8_t>(current); // LCOV_EXCL_LINE
}
}
// step 2: convert array into number of type T and return
std::memcpy(&result, vec.data(), sizeof(NumberType));
return true;
}
/*!
@brief create a string by reading characters from the input
@tparam NumberType the type of the number
@param[in] format the current format (for diagnostics)
@param[in] len number of characters to read
@param[out] result string created by reading @a len bytes
@return whether string creation completed
@note We can not reserve @a len bytes for the result, because @a len
may be too large. Usually, @ref unexpect_eof() detects the end of
the input before we run out of string memory.
*/
template<typename NumberType>
bool get_string(const input_format_t format,
const NumberType len,
string_t& result)
{
bool success = true;
std::generate_n(std::back_inserter(result), len, [this, &success, &format]()
{
get();
if (JSON_UNLIKELY(not unexpect_eof(format, "string")))
{
success = false;
}
return static_cast<char>(current);
});
return success;
}
/*!
@param[in] format the current format (for diagnostics)
@param[in] context further context information (for diagnostics)
......@@ -1894,7 +1916,6 @@ class binary_reader
return std::string{cr};
}
private:
/*!
@param[in] format the current format
@param[in] detail a detailed error message
......@@ -1934,6 +1955,7 @@ class binary_reader
return error_msg + " " + context + ": " + detail;
}
private:
/// input adapter
input_adapter_t ia = nullptr;
......
......@@ -35,7 +35,34 @@ class binary_writer
}
/*!
@brief[in] j JSON value to serialize
@param[in] j JSON value to serialize
@pre j.type() == value_t::object
*/
void write_bson(const BasicJsonType& j)
{
switch (j.type())
{
case value_t::object:
{
write_bson_object(*j.m_value.object);
break;
}
case value_t::discarded:
{
break;
}
default:
{
JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name())));
break;
}
}
}
/*!
@param[in] j JSON value to serialize
*/
void write_cbor(const BasicJsonType& j)
{
......@@ -279,7 +306,7 @@ class binary_writer
}
/*!
@brief[in] j JSON value to serialize
@param[in] j JSON value to serialize
*/
void write_msgpack(const BasicJsonType& j)
{
......@@ -678,13 +705,19 @@ class binary_writer
}
}
private:
//////////
// BSON //
//////////
/*!
@return The size of a BSON document entry header, including the id marker and the entry name size (and its null-terminator).
@return The size of a BSON document entry header, including the id marker
and the entry name size (and its null-terminator).
*/
static std::size_t calc_bson_entry_header_size(const typename BasicJsonType::string_t& name)
{
const auto it = name.find(static_cast<typename BasicJsonType::string_t::value_type>(0));
if (it != BasicJsonType::string_t::npos)
if (JSON_UNLIKELY(it != BasicJsonType::string_t::npos))
{
JSON_THROW(out_of_range::create(409,
"BSON key cannot contain code point U+0000 (at byte " + std::to_string(it) + ")"));
......@@ -696,7 +729,8 @@ class binary_writer
/*!
@brief Writes the given @a element_type and @a name to the output adapter
*/
void write_bson_entry_header(const typename BasicJsonType::string_t& name, std::uint8_t element_type)
void write_bson_entry_header(const typename BasicJsonType::string_t& name,
std::uint8_t element_type)
{
oa->write_character(static_cast<CharType>(element_type)); // boolean
oa->write_characters(
......@@ -707,7 +741,8 @@ class binary_writer
/*!
@brief Writes a BSON element with key @a name and boolean value @a value
*/
void write_bson_boolean(const typename BasicJsonType::string_t& name, const bool value)
void write_bson_boolean(const typename BasicJsonType::string_t& name,
const bool value)
{
write_bson_entry_header(name, 0x08);
oa->write_character(value ? static_cast<CharType>(0x01) : static_cast<CharType>(0x00));
......@@ -716,7 +751,8 @@ class binary_writer
/*!
@brief Writes a BSON element with key @a name and double value @a value
*/
void write_bson_double(const typename BasicJsonType::string_t& name, const double value)
void write_bson_double(const typename BasicJsonType::string_t& name,
const double value)
{
write_bson_entry_header(name, 0x01);
write_number<double, true>(value);
......@@ -733,7 +769,8 @@ class binary_writer
/*!
@brief Writes a BSON element with key @a name and string value @a value
*/
void write_bson_string(const typename BasicJsonType::string_t& name, const typename BasicJsonType::string_t& value)
void write_bson_string(const typename BasicJsonType::string_t& name,
const typename BasicJsonType::string_t& value)
{
write_bson_entry_header(name, 0x02);
......@@ -769,7 +806,8 @@ class binary_writer
/*!
@brief Writes a BSON element with key @a name and integer @a value
*/
void write_bson_integer(const typename BasicJsonType::string_t& name, const std::int64_t value)
void write_bson_integer(const typename BasicJsonType::string_t& name,
const std::int64_t value)
{
if ((std::numeric_limits<std::int32_t>::min)() <= value and value <= (std::numeric_limits<std::int32_t>::max)())
{
......@@ -783,7 +821,6 @@ class binary_writer
}
}
/*!
@return The size of the BSON-encoded unsigned integer in @a j
*/
......@@ -802,7 +839,8 @@ class binary_writer
/*!
@brief Writes a BSON element with key @a name and unsigned @a value
*/
void write_bson_unsigned(const typename BasicJsonType::string_t& name, const std::uint64_t value)
void write_bson_unsigned(const typename BasicJsonType::string_t& name,
const std::uint64_t value)
{
if (value <= static_cast<std::uint64_t>((std::numeric_limits<std::int32_t>::max)()))
{
......@@ -824,13 +862,13 @@ class binary_writer
/*!
@brief Writes a BSON element with key @a name and object @a value
*/
void write_bson_object_entry(const typename BasicJsonType::string_t& name, const typename BasicJsonType::object_t& value)
void write_bson_object_entry(const typename BasicJsonType::string_t& name,
const typename BasicJsonType::object_t& value)
{
write_bson_entry_header(name, 0x03); // object
write_bson_object(value);
}
/*!
@return The size of the BSON-encoded array @a value
*/
......@@ -849,10 +887,11 @@ class binary_writer
/*!
@brief Writes a BSON element with key @a name and array @a value
*/
void write_bson_array(const typename BasicJsonType::string_t& name, const typename BasicJsonType::array_t& value)
void write_bson_array(const typename BasicJsonType::string_t& name,
const typename BasicJsonType::array_t& value)
{
write_bson_entry_header(name, 0x04); // array
write_number<std::int32_t, true>(calc_bson_array_size(value));
write_number<std::int32_t, true>(static_cast<std::int32_t>(calc_bson_array_size(value)));
for (const auto& el : value)
{
......@@ -862,75 +901,95 @@ class binary_writer
oa->write_character(static_cast<CharType>(0x00));
}
/*!
@brief Calculates the size necessary to serialize the JSON value @a j with its @a name
@return The calculated size for the BSON document entry for @a j with the given @a name.
*/
static std::size_t calc_bson_element_size(const typename BasicJsonType::string_t& name, const BasicJsonType& j)
static std::size_t calc_bson_element_size(const typename BasicJsonType::string_t& name,
const BasicJsonType& j)
{
const auto header_size = calc_bson_entry_header_size(name);
switch (j.type())
{
// LCOV_EXCL_START
default:
assert(false);
return 0ul;
// LCOV_EXCL_STOP
case value_t::discarded:
return 0ul;
case value_t::object:
return header_size + calc_bson_object_size(*j.m_value.object);
case value_t::array:
return header_size + calc_bson_array_size(*j.m_value.array);
case value_t::boolean:
return header_size + 1ul;
case value_t::number_float:
return header_size + 8ul;
case value_t::number_integer:
return header_size + calc_bson_integer_size(j.m_value.number_integer);
case value_t::number_unsigned:
return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned);
case value_t::string:
return header_size + calc_bson_string_size(*j.m_value.string);
case value_t::null:
return header_size + 0ul;
// LCOV_EXCL_START
default:
assert(false);
return 0ul;
// LCOV_EXCL_STOP
};
}
/*!
@brief Serializes the JSON value @a j to BSON and associates it with the key @a name.
@param name The name to associate with the JSON entity @a j within the current BSON document
@return The size of the bson entry
@brief Serializes the JSON value @a j to BSON and associates it with the
key @a name.
@param name The name to associate with the JSON entity @a j within the
current BSON document
@return The size of the BSON entry
*/
void write_bson_element(const typename BasicJsonType::string_t& name, const BasicJsonType& j)
void write_bson_element(const typename BasicJsonType::string_t& name,
const BasicJsonType& j)
{
switch (j.type())
{
// LCOV_EXCL_START
default:
assert(false);
return;
// LCOV_EXCL_STOP
case value_t::discarded:
return;
case value_t::object:
return write_bson_object_entry(name, *j.m_value.object);
case value_t::array:
return write_bson_array(name, *j.m_value.array);
case value_t::boolean:
return write_bson_boolean(name, j.m_value.boolean);
case value_t::number_float:
return write_bson_double(name, j.m_value.number_float);
case value_t::number_integer:
return write_bson_integer(name, j.m_value.number_integer);
case value_t::number_unsigned:
return write_bson_unsigned(name, j.m_value.number_unsigned);
case value_t::string:
return write_bson_string(name, *j.m_value.string);
case value_t::null:
return write_bson_null(name);
// LCOV_EXCL_START
default:
assert(false);
return;
// LCOV_EXCL_STOP
};
}
......@@ -958,7 +1017,7 @@ class binary_writer
*/
void write_bson_object(const typename BasicJsonType::object_t& value)
{
write_number<std::int32_t, true>(calc_bson_object_size(value));
write_number<std::int32_t, true>(static_cast<std::int32_t>(calc_bson_object_size(value)));
for (const auto& el : value)
{
......@@ -968,64 +1027,38 @@ class binary_writer
oa->write_character(static_cast<CharType>(0x00));
}
/*!
@param[in] j JSON value to serialize
@pre j.type() == value_t::object
*/
void write_bson(const BasicJsonType& j)
{
switch (j.type())
{
case value_t::object:
{
write_bson_object(*j.m_value.object);
break;
}
case value_t::discarded:
{
break;
}
//////////
// CBOR //
//////////
default:
{
JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name())));
break;
}
}
static constexpr CharType get_cbor_float_prefix(float /*unused*/)
{
return static_cast<CharType>(0xFA); // Single-Precision Float
}
static constexpr CharType get_cbor_float_prefix(double /*unused*/)
{
return static_cast<CharType>(0xFB); // Double-Precision Float
}
private:
/*
@brief write a number to output input
@param[in] n number of type @a NumberType
@tparam NumberType the type of the number
@tparam OutputIsLittleEndian Set to true if output data is
required to be little endian
/////////////
// MsgPack //
/////////////
@note This function needs to respect the system's endianess, because bytes
in CBOR, MessagePack, and UBJSON are stored in network order (big
endian) and therefore need reordering on little endian systems.
*/
template<typename NumberType, bool OutputIsLittleEndian = false>
void write_number(const NumberType n)
static constexpr CharType get_msgpack_float_prefix(float /*unused*/)
{
// step 1: write number to array of length NumberType
std::array<CharType, sizeof(NumberType)> vec;
std::memcpy(vec.data(), &n, sizeof(NumberType));
// step 2: write array to output (with possible reordering)
if (is_little_endian and not OutputIsLittleEndian)
{
// reverse byte order prior to conversion if necessary
std::reverse(vec.begin(), vec.end());
}
return static_cast<CharType>(0xCA); // float 32
}
oa->write_characters(vec.data(), sizeof(NumberType));
static constexpr CharType get_msgpack_float_prefix(double /*unused*/)
{
return static_cast<CharType>(0xCB); // float 64
}
////////////
// UBJSON //
////////////
// UBJSON: write number (floating point)
template<typename NumberType, typename std::enable_if<
std::is_floating_point<NumberType>::value, int>::type = 0>
......@@ -1226,34 +1259,47 @@ class binary_writer
}
}
static constexpr CharType get_cbor_float_prefix(float /*unused*/)
static constexpr CharType get_ubjson_float_prefix(float /*unused*/)
{
return static_cast<CharType>(0xFA); // Single-Precision Float
return 'd'; // float 32
}
static constexpr CharType get_cbor_float_prefix(double /*unused*/)
static constexpr CharType get_ubjson_float_prefix(double /*unused*/)
{
return static_cast<CharType>(0xFB); // Double-Precision Float
return 'D'; // float 64
}
static constexpr CharType get_msgpack_float_prefix(float /*unused*/)
{
return static_cast<CharType>(0xCA); // float 32
}
///////////////////////
// Utility functions //
///////////////////////
static constexpr CharType get_msgpack_float_prefix(double /*unused*/)
{
return static_cast<CharType>(0xCB); // float 64
}
/*
@brief write a number to output input
static constexpr CharType get_ubjson_float_prefix(float /*unused*/)
{
return 'd'; // float 32
}
@param[in] n number of type @a NumberType
@tparam NumberType the type of the number
@tparam OutputIsLittleEndian Set to true if output data is
required to be little endian
static constexpr CharType get_ubjson_float_prefix(double /*unused*/)
@note This function needs to respect the system's endianess, because bytes
in CBOR, MessagePack, and UBJSON are stored in network order (big
endian) and therefore need reordering on little endian systems.
*/
template<typename NumberType, bool OutputIsLittleEndian = false>
void write_number(const NumberType n)
{
return 'D'; // float 64
// step 1: write number to array of length NumberType
std::array<CharType, sizeof(NumberType)> vec;
std::memcpy(vec.data(), &n, sizeof(NumberType));
// step 2: write array to output (with possible reordering)
if (is_little_endian and not OutputIsLittleEndian)
{
// reverse byte order prior to conversion if necessary
std::reverse(vec.begin(), vec.end());
}
oa->write_characters(vec.data(), sizeof(NumberType));
}
private:
......
......@@ -6426,14 +6426,38 @@ class binary_reader
}
private:
//////////
// BSON //
//////////
/*!
@brief Reads in a BSON-object and passes it to the SAX-parser.
@return whether a valid BSON-value was passed to the SAX parser
*/
bool parse_bson_internal()
{
std::int32_t documentSize;
get_number<std::int32_t, true>(input_format_t::bson, documentSize);
if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1))))
{
return false;
}
if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false)))
{
return false;
}
return sax->end_object();
}
/*!
@brief Parses a C-style string from the BSON input.
@param [out] result A reference to the string variable where the read string
is to be stored.
@return `true` if the \x00-byte indicating the end of the
string was encountered before the EOF.
`false` indicates an unexpected EOF.
@param[in, out] result A reference to the string variable where the read
string is to be stored.
@return `true` if the \x00-byte indicating the end of the string was
encountered before the EOF; false` indicates an unexpected EOF.
*/
bool get_bson_cstr(string_t& result)
{
......@@ -6456,107 +6480,107 @@ class binary_reader
}
/*!
@brief Parses a zero-terminated string of length @a len from the BSON input.
@param [in] len The length (including the zero-byte at the end) of the string to be read.
@param [out] result A reference to the string variable where the read string
is to be stored.
@brief Parses a zero-terminated string of length @a len from the BSON
input.
@param[in] len The length (including the zero-byte at the end) of the
string to be read.
@param[in, out] result A reference to the string variable where the read
string is to be stored.
@tparam NumberType The type of the length @a len
@pre len > 0
@return `true` if the string was successfully parsed
*/
template <typename NumberType>
template<typename NumberType>
bool get_bson_string(const NumberType len, string_t& result)
{
return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result)
&& get() != std::char_traits<char>::eof();
}
/*!
@return A hexadecimal string representation of the given @a byte
@param byte The byte to convert to a string
*/
static std::string byte_hexstring(unsigned char byte)
{
char cr[3];
snprintf(cr, sizeof(cr), "%02hhX", byte);
return std::string{cr};
return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) and get() != std::char_traits<char>::eof();
}
/*!
@brief Read a BSON document element of the given @a element_type.
@param element_type The BSON element type, c.f. http://bsonspec.org/spec.html
@param element_type_parse_position The position in the input stream, where the `element_type` was read.
@warning Not all BSON element types are supported yet. An unsupported @a element_type will
give rise to a parse_error.114: Unsupported BSON record type 0x...
@param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html
@param[in] element_type_parse_position The position in the input stream,
where the `element_type` was read.
@warning Not all BSON element types are supported yet. An unsupported
@a element_type will give rise to a parse_error.114:
Unsupported BSON record type 0x...
@return whether a valid BSON-object/array was passed to the SAX parser
*/
bool parse_bson_element_internal(int element_type, std::size_t element_type_parse_position)
bool parse_bson_element_internal(const int element_type,
const std::size_t element_type_parse_position)
{
switch (element_type)
{
case 0x01: // double
{
double number;
return get_number<double, true>(input_format_t::bson, number)
&& sax->number_float(static_cast<number_float_t>(number), "");
return get_number<double, true>(input_format_t::bson, number) and sax->number_float(static_cast<number_float_t>(number), "");
}
case 0x02: // string
{
std::int32_t len;
string_t value;
return get_number<std::int32_t, true>(input_format_t::bson, len)
&& get_bson_string(len, value)
&& sax->string(value);
return get_number<std::int32_t, true>(input_format_t::bson, len) and get_bson_string(len, value) and sax->string(value);
}
case 0x08: // boolean
case 0x03: // object
{
return sax->boolean(static_cast<bool>(get()));
return parse_bson_internal();
}
case 0x10: // int32
case 0x04: // array
{
std::int32_t value;
return get_number<std::int32_t, true>(input_format_t::bson, value)
&& sax->number_integer(static_cast<std::int32_t>(value));
return parse_bson_array();
}
case 0x12: // int64
case 0x08: // boolean
{
std::int64_t value;
return get_number<std::int64_t, true>(input_format_t::bson, value)
&& sax->number_integer(static_cast<std::int64_t>(value));
return sax->boolean(static_cast<bool>(get()));
}
case 0x0A: // null
{
return sax->null();
}
case 0x03: // object
case 0x10: // int32
{
return parse_bson_internal();
std::int32_t value;
return get_number<std::int32_t, true>(input_format_t::bson, value) and sax->number_integer(static_cast<std::int32_t>(value));
}
case 0x04: // array
case 0x12: // int64
{
return parse_bson_array();
std::int64_t value;
return get_number<std::int64_t, true>(input_format_t::bson, value) and sax->number_integer(static_cast<std::int64_t>(value));
}
default: // anything else not supported (yet)
{
auto element_type_str = byte_hexstring(element_type);
return sax->parse_error(element_type_parse_position, element_type_str, parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + element_type_str));
char cr[3];
snprintf(cr, sizeof(cr), "%.2hhX", static_cast<unsigned char>(element_type));
return sax->parse_error(element_type_parse_position, std::string(cr), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr)));
}
}
}
/*!
@brief Read a BSON element list (as specified in the BSON-spec) from the input
and passes it to the SAX-parser.
The same binary layout is used for objects and arrays, hence it must
be indicated with the argument @a is_array which one is expected
(true --> array, false --> object).
@param is_array Determines if the element list being read is to be treated as
an object (@a is_array == false), or as an array (@a is_array == true).
@brief Read a BSON element list (as specified in the BSON-spec)
The same binary layout is used for objects and arrays, hence it must be
indicated with the argument @a is_array which one is expected
(true --> array, false --> object).
@param[in] is_array Determines if the element list being read is to be
treated as an object (@a is_array == false), or as an
array (@a is_array == true).
@return whether a valid BSON-object/array was passed to the SAX parser
*/
bool parse_bson_element_list(bool is_array)
bool parse_bson_element_list(const bool is_array)
{
while (auto element_type = get())
string_t key;
while (int element_type = get())
{
if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list")))
{
......@@ -6564,13 +6588,12 @@ class binary_reader
}
const std::size_t element_type_parse_position = chars_read;
string_t key;
if (JSON_UNLIKELY(not get_bson_cstr(key)))
{
return false;
}
if (!is_array)
if (not is_array)
{
sax->key(key);
}
......@@ -6579,7 +6602,11 @@ class binary_reader
{
return false;
}
// get_bson_cstr only appends
key.clear();
}
return true;
}
......@@ -6592,7 +6619,7 @@ class binary_reader
std::int32_t documentSize;
get_number<std::int32_t, true>(input_format_t::bson, documentSize);
if (JSON_UNLIKELY(not sax->start_array(-1)))
if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1))))
{
return false;
}
......@@ -6605,27 +6632,9 @@ class binary_reader
return sax->end_array();
}
/*!
@brief Reads in a BSON-object and pass it to the SAX-parser.
@return whether a valid BSON-value was passed to the SAX parser
*/
bool parse_bson_internal()
{
std::int32_t documentSize;
get_number<std::int32_t, true>(input_format_t::bson, documentSize);
if (JSON_UNLIKELY(not sax->start_object(-1)))
{
return false;
}
if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false)))
{
return false;
}
return sax->end_object();
}
//////////
// CBOR //
//////////
/*!
@param[in] get_char whether a new character should be retrieved from the
......@@ -6966,113 +6975,26 @@ class binary_reader
}
/*!
@return whether a valid MessagePack value was passed to the SAX parser
@brief reads a CBOR string
This function first reads starting bytes to determine the expected
string length and then copies this number of bytes into a string.
Additionally, CBOR's strings with indefinite lengths are supported.
@param[out] result created string
@return whether string creation completed
*/
bool parse_msgpack_internal()
bool get_cbor_string(string_t& result)
{
switch (get())
if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string")))
{
// EOF
case std::char_traits<char>::eof():
return unexpect_eof(input_format_t::msgpack, "value");
return false;
}
// positive fixint
case 0x00:
case 0x01:
case 0x02:
case 0x03:
case 0x04:
case 0x05:
case 0x06:
case 0x07:
case 0x08:
case 0x09:
case 0x0A:
case 0x0B:
case 0x0C:
case 0x0D:
case 0x0E:
case 0x0F:
case 0x10:
case 0x11:
case 0x12:
case 0x13:
case 0x14:
case 0x15:
case 0x16:
case 0x17:
case 0x18:
case 0x19:
case 0x1A:
case 0x1B:
case 0x1C:
case 0x1D:
case 0x1E:
case 0x1F:
case 0x20:
case 0x21:
case 0x22:
case 0x23:
case 0x24:
case 0x25:
case 0x26:
case 0x27:
case 0x28:
case 0x29:
case 0x2A:
case 0x2B:
case 0x2C:
case 0x2D:
case 0x2E:
case 0x2F:
case 0x30:
case 0x31:
case 0x32:
case 0x33:
case 0x34:
case 0x35:
case 0x36:
case 0x37:
case 0x38:
case 0x39:
case 0x3A:
case 0x3B:
case 0x3C:
case 0x3D:
case 0x3E:
case 0x3F:
case 0x40:
case 0x41:
case 0x42:
case 0x43:
case 0x44:
case 0x45:
case 0x46:
case 0x47:
case 0x48:
case 0x49:
case 0x4A:
case 0x4B:
case 0x4C:
case 0x4D:
case 0x4E:
case 0x4F:
case 0x50:
case 0x51:
case 0x52:
case 0x53:
case 0x54:
case 0x55:
case 0x56:
case 0x57:
case 0x58:
case 0x59:
case 0x5A:
case 0x5B:
case 0x5C:
case 0x5D:
case 0x5E:
case 0x5F:
switch (current)
{
// UTF-8 string (0x00..0x17 bytes follow)
case 0x60:
case 0x61:
case 0x62:
......@@ -7097,371 +7019,254 @@ class binary_reader
case 0x75:
case 0x76:
case 0x77:
case 0x78:
case 0x79:
case 0x7A:
case 0x7B:
case 0x7C:
case 0x7D:
case 0x7E:
case 0x7F:
return sax->number_unsigned(static_cast<number_unsigned_t>(current));
// fixmap
case 0x80:
case 0x81:
case 0x82:
case 0x83:
case 0x84:
case 0x85:
case 0x86:
case 0x87:
case 0x88:
case 0x89:
case 0x8A:
case 0x8B:
case 0x8C:
case 0x8D:
case 0x8E:
case 0x8F:
return get_msgpack_object(static_cast<std::size_t>(current & 0x0F));
{
return get_string(input_format_t::cbor, current & 0x1F, result);
}
// fixarray
case 0x90:
case 0x91:
case 0x92:
case 0x93:
case 0x94:
case 0x95:
case 0x96:
case 0x97:
case 0x98:
case 0x99:
case 0x9A:
case 0x9B:
case 0x9C:
case 0x9D:
case 0x9E:
case 0x9F:
return get_msgpack_array(static_cast<std::size_t>(current & 0x0F));
case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
{
uint8_t len;
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
}
// fixstr
case 0xA0:
case 0xA1:
case 0xA2:
case 0xA3:
case 0xA4:
case 0xA5:
case 0xA6:
case 0xA7:
case 0xA8:
case 0xA9:
case 0xAA:
case 0xAB:
case 0xAC:
case 0xAD:
case 0xAE:
case 0xAF:
case 0xB0:
case 0xB1:
case 0xB2:
case 0xB3:
case 0xB4:
case 0xB5:
case 0xB6:
case 0xB7:
case 0xB8:
case 0xB9:
case 0xBA:
case 0xBB:
case 0xBC:
case 0xBD:
case 0xBE:
case 0xBF:
{
string_t s;
return get_msgpack_string(s) and sax->string(s);
}
case 0xC0: // nil
return sax->null();
case 0xC2: // false
return sax->boolean(false);
case 0xC3: // true
return sax->boolean(true);
case 0xCA: // float 32
{
float number;
return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), "");
}
case 0xCB: // float 64
{
double number;
return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), "");
}
case 0xCC: // uint 8
case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
{
uint8_t number;
return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
uint16_t len;
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
}
case 0xCD: // uint 16
case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
{
uint16_t number;
return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
uint32_t len;
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
}
case 0xCE: // uint 32
case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
{
uint32_t number;
return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
uint64_t len;
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
}
case 0xCF: // uint 64
case 0x7F: // UTF-8 string (indefinite length)
{
uint64_t number;
return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
while (get() != 0xFF)
{
string_t chunk;
if (not get_cbor_string(chunk))
{
return false;
}
result.append(chunk);
}
return true;
}
case 0xD0: // int 8
default:
{
int8_t number;
return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string")));
}
}
}
case 0xD1: // int 16
{
int16_t number;
return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
}
/*!
@param[in] len the length of the array or std::size_t(-1) for an
array of indefinite size
@return whether array creation completed
*/
bool get_cbor_array(const std::size_t len)
{
if (JSON_UNLIKELY(not sax->start_array(len)))
{
return false;
}
case 0xD2: // int 32
if (len != std::size_t(-1))
{
for (std::size_t i = 0; i < len; ++i)
{
int32_t number;
return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
if (JSON_UNLIKELY(not parse_cbor_internal()))
{
return false;
}
}
case 0xD3: // int 64
}
else
{
while (get() != 0xFF)
{
int64_t number;
return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
if (JSON_UNLIKELY(not parse_cbor_internal(false)))
{
return false;
}
}
}
case 0xD9: // str 8
case 0xDA: // str 16
case 0xDB: // str 32
{
string_t s;
return get_msgpack_string(s) and sax->string(s);
}
return sax->end_array();
}
case 0xDC: // array 16
{
uint16_t len;
return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len));
}
/*!
@param[in] len the length of the object or std::size_t(-1) for an
object of indefinite size
@return whether object creation completed
*/
bool get_cbor_object(const std::size_t len)
{
if (not JSON_UNLIKELY(sax->start_object(len)))
{
return false;
}
case 0xDD: // array 32
string_t key;
if (len != std::size_t(-1))
{
for (std::size_t i = 0; i < len; ++i)
{
uint32_t len;
return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len));
}
get();
if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key)))
{
return false;
}
case 0xDE: // map 16
{
uint16_t len;
return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len));
if (JSON_UNLIKELY(not parse_cbor_internal()))
{
return false;
}
key.clear();
}
case 0xDF: // map 32
}
else
{
while (get() != 0xFF)
{
uint32_t len;
return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len));
}
// negative fixint
case 0xE0:
case 0xE1:
case 0xE2:
case 0xE3:
case 0xE4:
case 0xE5:
case 0xE6:
case 0xE7:
case 0xE8:
case 0xE9:
case 0xEA:
case 0xEB:
case 0xEC:
case 0xED:
case 0xEE:
case 0xEF:
case 0xF0:
case 0xF1:
case 0xF2:
case 0xF3:
case 0xF4:
case 0xF5:
case 0xF6:
case 0xF7:
case 0xF8:
case 0xF9:
case 0xFA:
case 0xFB:
case 0xFC:
case 0xFD:
case 0xFE:
case 0xFF:
return sax->number_integer(static_cast<int8_t>(current));
if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key)))
{
return false;
}
default: // anything else
{
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::msgpack, "invalid byte: 0x" + last_token, "value")));
if (JSON_UNLIKELY(not parse_cbor_internal()))
{
return false;
}
key.clear();
}
}
return sax->end_object();
}
/*!
@param[in] get_char whether a new character should be retrieved from the
input (true, default) or whether the last read
character should be considered instead
/////////////
// MsgPack //
/////////////
@return whether a valid UBJSON value was passed to the SAX parser
/*!
@return whether a valid MessagePack value was passed to the SAX parser
*/
bool parse_ubjson_internal(const bool get_char = true)
bool parse_msgpack_internal()
{
return get_ubjson_value(get_char ? get_ignore_noop() : current);
}
/*!
@brief get next character from the input
This function provides the interface to the used input adapter. It does
not throw in case the input reached EOF, but returns a -'ve valued
`std::char_traits<char>::eof()` in that case.
@return character read from the input
*/
int get()
{
++chars_read;
return (current = ia->get_character());
}
/*!
@return character read from the input after ignoring all 'N' entries
*/
int get_ignore_noop()
{
do
{
get();
}
while (current == 'N');
return current;
}
/*
@brief read a number from the input
@tparam NumberType the type of the number
@param[in] format the current format (for diagnostics)
@param[out] result number of type @a NumberType
@return whether conversion completed
@note This function needs to respect the system's endianess, because
bytes in CBOR, MessagePack, and UBJSON are stored in network order
(big endian) and therefore need reordering on little endian systems.
*/
template<typename NumberType, bool InputIsLittleEndian = false>
bool get_number(const input_format_t format, NumberType& result)
{
// step 1: read input into array with system's byte order
std::array<uint8_t, sizeof(NumberType)> vec;
for (std::size_t i = 0; i < sizeof(NumberType); ++i)
{
get();
if (JSON_UNLIKELY(not unexpect_eof(format, "number")))
{
return false;
}
// reverse byte order prior to conversion if necessary
if (is_little_endian && !InputIsLittleEndian)
{
vec[sizeof(NumberType) - i - 1] = static_cast<uint8_t>(current);
}
else
{
vec[i] = static_cast<uint8_t>(current); // LCOV_EXCL_LINE
}
}
// step 2: convert array into number of type T and return
std::memcpy(&result, vec.data(), sizeof(NumberType));
return true;
}
/*!
@brief create a string by reading characters from the input
@tparam NumberType the type of the number
@param[in] format the current format (for diagnostics)
@param[in] len number of characters to read
@param[out] result string created by reading @a len bytes
@return whether string creation completed
@note We can not reserve @a len bytes for the result, because @a len
may be too large. Usually, @ref unexpect_eof() detects the end of
the input before we run out of string memory.
*/
template<typename NumberType>
bool get_string(const input_format_t format, const NumberType len, string_t& result)
{
bool success = true;
std::generate_n(std::back_inserter(result), len, [this, &success, &format]()
{
get();
if (JSON_UNLIKELY(not unexpect_eof(format, "string")))
{
success = false;
}
return static_cast<char>(current);
});
return success;
}
/*!
@brief reads a CBOR string
This function first reads starting bytes to determine the expected
string length and then copies this number of bytes into a string.
Additionally, CBOR's strings with indefinite lengths are supported.
@param[out] result created string
@return whether string creation completed
*/
bool get_cbor_string(string_t& result)
{
if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string")))
switch (get())
{
return false;
}
// EOF
case std::char_traits<char>::eof():
return unexpect_eof(input_format_t::msgpack, "value");
switch (current)
{
// UTF-8 string (0x00..0x17 bytes follow)
// positive fixint
case 0x00:
case 0x01:
case 0x02:
case 0x03:
case 0x04:
case 0x05:
case 0x06:
case 0x07:
case 0x08:
case 0x09:
case 0x0A:
case 0x0B:
case 0x0C:
case 0x0D:
case 0x0E:
case 0x0F:
case 0x10:
case 0x11:
case 0x12:
case 0x13:
case 0x14:
case 0x15:
case 0x16:
case 0x17:
case 0x18:
case 0x19:
case 0x1A:
case 0x1B:
case 0x1C:
case 0x1D:
case 0x1E:
case 0x1F:
case 0x20:
case 0x21:
case 0x22:
case 0x23:
case 0x24:
case 0x25:
case 0x26:
case 0x27:
case 0x28:
case 0x29:
case 0x2A:
case 0x2B:
case 0x2C:
case 0x2D:
case 0x2E:
case 0x2F:
case 0x30:
case 0x31:
case 0x32:
case 0x33:
case 0x34:
case 0x35:
case 0x36:
case 0x37:
case 0x38:
case 0x39:
case 0x3A:
case 0x3B:
case 0x3C:
case 0x3D:
case 0x3E:
case 0x3F:
case 0x40:
case 0x41:
case 0x42:
case 0x43:
case 0x44:
case 0x45:
case 0x46:
case 0x47:
case 0x48:
case 0x49:
case 0x4A:
case 0x4B:
case 0x4C:
case 0x4D:
case 0x4E:
case 0x4F:
case 0x50:
case 0x51:
case 0x52:
case 0x53:
case 0x54:
case 0x55:
case 0x56:
case 0x57:
case 0x58:
case 0x59:
case 0x5A:
case 0x5B:
case 0x5C:
case 0x5D:
case 0x5E:
case 0x5F:
case 0x60:
case 0x61:
case 0x62:
......@@ -7486,140 +7291,234 @@ class binary_reader
case 0x75:
case 0x76:
case 0x77:
case 0x78:
case 0x79:
case 0x7A:
case 0x7B:
case 0x7C:
case 0x7D:
case 0x7E:
case 0x7F:
return sax->number_unsigned(static_cast<number_unsigned_t>(current));
// fixmap
case 0x80:
case 0x81:
case 0x82:
case 0x83:
case 0x84:
case 0x85:
case 0x86:
case 0x87:
case 0x88:
case 0x89:
case 0x8A:
case 0x8B:
case 0x8C:
case 0x8D:
case 0x8E:
case 0x8F:
return get_msgpack_object(static_cast<std::size_t>(current & 0x0F));
// fixarray
case 0x90:
case 0x91:
case 0x92:
case 0x93:
case 0x94:
case 0x95:
case 0x96:
case 0x97:
case 0x98:
case 0x99:
case 0x9A:
case 0x9B:
case 0x9C:
case 0x9D:
case 0x9E:
case 0x9F:
return get_msgpack_array(static_cast<std::size_t>(current & 0x0F));
// fixstr
case 0xA0:
case 0xA1:
case 0xA2:
case 0xA3:
case 0xA4:
case 0xA5:
case 0xA6:
case 0xA7:
case 0xA8:
case 0xA9:
case 0xAA:
case 0xAB:
case 0xAC:
case 0xAD:
case 0xAE:
case 0xAF:
case 0xB0:
case 0xB1:
case 0xB2:
case 0xB3:
case 0xB4:
case 0xB5:
case 0xB6:
case 0xB7:
case 0xB8:
case 0xB9:
case 0xBA:
case 0xBB:
case 0xBC:
case 0xBD:
case 0xBE:
case 0xBF:
{
string_t s;
return get_msgpack_string(s) and sax->string(s);
}
case 0xC0: // nil
return sax->null();
case 0xC2: // false
return sax->boolean(false);
case 0xC3: // true
return sax->boolean(true);
case 0xCA: // float 32
{
float number;
return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), "");
}
case 0xCB: // float 64
{
double number;
return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), "");
}
case 0xCC: // uint 8
{
uint8_t number;
return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
}
case 0xCD: // uint 16
{
return get_string(input_format_t::cbor, current & 0x1F, result);
uint16_t number;
return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
}
case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
case 0xCE: // uint 32
{
uint8_t len;
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
uint32_t number;
return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
}
case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
case 0xCF: // uint 64
{
uint16_t len;
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
uint64_t number;
return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
}
case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
case 0xD0: // int 8
{
uint32_t len;
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
int8_t number;
return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
}
case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
case 0xD1: // int 16
{
uint64_t len;
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
int16_t number;
return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
}
case 0x7F: // UTF-8 string (indefinite length)
case 0xD2: // int 32
{
while (get() != 0xFF)
{
string_t chunk;
if (not get_cbor_string(chunk))
{
return false;
}
result.append(chunk);
}
return true;
int32_t number;
return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
}
default:
case 0xD3: // int 64
{
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string")));
int64_t number;
return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
}
}
}
/*!
@param[in] len the length of the array or std::size_t(-1) for an
array of indefinite size
@return whether array creation completed
*/
bool get_cbor_array(const std::size_t len)
{
if (JSON_UNLIKELY(not sax->start_array(len)))
{
return false;
}
if (len != std::size_t(-1))
{
for (std::size_t i = 0; i < len; ++i)
case 0xD9: // str 8
case 0xDA: // str 16
case 0xDB: // str 32
{
if (JSON_UNLIKELY(not parse_cbor_internal()))
{
return false;
}
string_t s;
return get_msgpack_string(s) and sax->string(s);
}
}
else
{
while (get() != 0xFF)
case 0xDC: // array 16
{
if (JSON_UNLIKELY(not parse_cbor_internal(false)))
{
return false;
}
uint16_t len;
return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len));
}
}
return sax->end_array();
}
/*!
@param[in] len the length of the object or std::size_t(-1) for an
object of indefinite size
@return whether object creation completed
*/
bool get_cbor_object(const std::size_t len)
{
if (not JSON_UNLIKELY(sax->start_object(len)))
{
return false;
}
string_t key;
if (len != std::size_t(-1))
{
for (std::size_t i = 0; i < len; ++i)
case 0xDD: // array 32
{
get();
if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key)))
{
return false;
}
uint32_t len;
return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len));
}
if (JSON_UNLIKELY(not parse_cbor_internal()))
{
return false;
}
key.clear();
case 0xDE: // map 16
{
uint16_t len;
return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len));
}
}
else
{
while (get() != 0xFF)
case 0xDF: // map 32
{
if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key)))
{
return false;
}
uint32_t len;
return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len));
}
if (JSON_UNLIKELY(not parse_cbor_internal()))
{
return false;
}
key.clear();
// negative fixint
case 0xE0:
case 0xE1:
case 0xE2:
case 0xE3:
case 0xE4:
case 0xE5:
case 0xE6:
case 0xE7:
case 0xE8:
case 0xE9:
case 0xEA:
case 0xEB:
case 0xEC:
case 0xED:
case 0xEE:
case 0xEF:
case 0xF0:
case 0xF1:
case 0xF2:
case 0xF3:
case 0xF4:
case 0xF5:
case 0xF6:
case 0xF7:
case 0xF8:
case 0xF9:
case 0xFA:
case 0xFB:
case 0xFC:
case 0xFD:
case 0xFE:
case 0xFF:
return sax->number_integer(static_cast<int8_t>(current));
default: // anything else
{
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::msgpack, "invalid byte: 0x" + last_token, "value")));
}
}
return sax->end_object();
}
/*!
......@@ -7756,6 +7655,22 @@ class binary_reader
return sax->end_object();
}
////////////
// UBJSON //
////////////
/*!
@param[in] get_char whether a new character should be retrieved from the
input (true, default) or whether the last read
character should be considered instead
@return whether a valid UBJSON value was passed to the SAX parser
*/
bool parse_ubjson_internal(const bool get_char = true)
{
return get_ubjson_value(get_char ? get_ignore_noop() : current);
}
/*!
@brief reads a UBJSON string
......@@ -8147,27 +8062,134 @@ class binary_reader
}
else
{
if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1))))
if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1))))
{
return false;
}
while (current != '}')
{
if (JSON_UNLIKELY(not get_ubjson_string(key, false) or not sax->key(key)))
{
return false;
}
if (JSON_UNLIKELY(not parse_ubjson_internal()))
{
return false;
}
get_ignore_noop();
key.clear();
}
}
return sax->end_object();
}
///////////////////////
// Utility functions //
///////////////////////
/*!
@brief get next character from the input
This function provides the interface to the used input adapter. It does
not throw in case the input reached EOF, but returns a -'ve valued
`std::char_traits<char>::eof()` in that case.
@return character read from the input
*/
int get()
{
++chars_read;
return (current = ia->get_character());
}
/*!
@return character read from the input after ignoring all 'N' entries
*/
int get_ignore_noop()
{
do
{
get();
}
while (current == 'N');
return current;
}
/*
@brief read a number from the input
@tparam NumberType the type of the number
@param[in] format the current format (for diagnostics)
@param[out] result number of type @a NumberType
@return whether conversion completed
@note This function needs to respect the system's endianess, because
bytes in CBOR, MessagePack, and UBJSON are stored in network order
(big endian) and therefore need reordering on little endian systems.
*/
template<typename NumberType, bool InputIsLittleEndian = false>
bool get_number(const input_format_t format, NumberType& result)
{
// step 1: read input into array with system's byte order
std::array<uint8_t, sizeof(NumberType)> vec;
for (std::size_t i = 0; i < sizeof(NumberType); ++i)
{
get();
if (JSON_UNLIKELY(not unexpect_eof(format, "number")))
{
return false;
}
while (current != '}')
// reverse byte order prior to conversion if necessary
if (is_little_endian && !InputIsLittleEndian)
{
if (JSON_UNLIKELY(not get_ubjson_string(key, false) or not sax->key(key)))
{
return false;
}
if (JSON_UNLIKELY(not parse_ubjson_internal()))
{
return false;
}
get_ignore_noop();
key.clear();
vec[sizeof(NumberType) - i - 1] = static_cast<uint8_t>(current);
}
else
{
vec[i] = static_cast<uint8_t>(current); // LCOV_EXCL_LINE
}
}
return sax->end_object();
// step 2: convert array into number of type T and return
std::memcpy(&result, vec.data(), sizeof(NumberType));
return true;
}
/*!
@brief create a string by reading characters from the input
@tparam NumberType the type of the number
@param[in] format the current format (for diagnostics)
@param[in] len number of characters to read
@param[out] result string created by reading @a len bytes
@return whether string creation completed
@note We can not reserve @a len bytes for the result, because @a len
may be too large. Usually, @ref unexpect_eof() detects the end of
the input before we run out of string memory.
*/
template<typename NumberType>
bool get_string(const input_format_t format,
const NumberType len,
string_t& result)
{
bool success = true;
std::generate_n(std::back_inserter(result), len, [this, &success, &format]()
{
get();
if (JSON_UNLIKELY(not unexpect_eof(format, "string")))
{
success = false;
}
return static_cast<char>(current);
});
return success;
}
/*!
......@@ -8195,7 +8217,6 @@ class binary_reader
return std::string{cr};
}
private:
/*!
@param[in] format the current format
@param[in] detail a detailed error message
......@@ -8235,6 +8256,7 @@ class binary_reader
return error_msg + " " + context + ": " + detail;
}
private:
/// input adapter
input_adapter_t ia = nullptr;
......@@ -8293,7 +8315,34 @@ class binary_writer
}
/*!
@brief[in] j JSON value to serialize
@param[in] j JSON value to serialize
@pre j.type() == value_t::object
*/
void write_bson(const BasicJsonType& j)
{
switch (j.type())
{
case value_t::object:
{
write_bson_object(*j.m_value.object);
break;
}
case value_t::discarded:
{
break;
}
default:
{
JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name())));
break;
}
}
}
/*!
@param[in] j JSON value to serialize
*/
void write_cbor(const BasicJsonType& j)
{
......@@ -8537,7 +8586,7 @@ class binary_writer
}
/*!
@brief[in] j JSON value to serialize
@param[in] j JSON value to serialize
*/
void write_msgpack(const BasicJsonType& j)
{
......@@ -8936,13 +8985,19 @@ class binary_writer
}
}
private:
//////////
// BSON //
//////////
/*!
@return The size of a BSON document entry header, including the id marker and the entry name size (and its null-terminator).
@return The size of a BSON document entry header, including the id marker
and the entry name size (and its null-terminator).
*/
static std::size_t calc_bson_entry_header_size(const typename BasicJsonType::string_t& name)
{
const auto it = name.find(static_cast<typename BasicJsonType::string_t::value_type>(0));
if (it != BasicJsonType::string_t::npos)
if (JSON_UNLIKELY(it != BasicJsonType::string_t::npos))
{
JSON_THROW(out_of_range::create(409,
"BSON key cannot contain code point U+0000 (at byte " + std::to_string(it) + ")"));
......@@ -8954,7 +9009,8 @@ class binary_writer
/*!
@brief Writes the given @a element_type and @a name to the output adapter
*/
void write_bson_entry_header(const typename BasicJsonType::string_t& name, std::uint8_t element_type)
void write_bson_entry_header(const typename BasicJsonType::string_t& name,
std::uint8_t element_type)
{
oa->write_character(static_cast<CharType>(element_type)); // boolean
oa->write_characters(
......@@ -8965,7 +9021,8 @@ class binary_writer
/*!
@brief Writes a BSON element with key @a name and boolean value @a value
*/
void write_bson_boolean(const typename BasicJsonType::string_t& name, const bool value)
void write_bson_boolean(const typename BasicJsonType::string_t& name,
const bool value)
{
write_bson_entry_header(name, 0x08);
oa->write_character(value ? static_cast<CharType>(0x01) : static_cast<CharType>(0x00));
......@@ -8974,7 +9031,8 @@ class binary_writer
/*!
@brief Writes a BSON element with key @a name and double value @a value
*/
void write_bson_double(const typename BasicJsonType::string_t& name, const double value)
void write_bson_double(const typename BasicJsonType::string_t& name,
const double value)
{
write_bson_entry_header(name, 0x01);
write_number<double, true>(value);
......@@ -8991,7 +9049,8 @@ class binary_writer
/*!
@brief Writes a BSON element with key @a name and string value @a value
*/
void write_bson_string(const typename BasicJsonType::string_t& name, const typename BasicJsonType::string_t& value)
void write_bson_string(const typename BasicJsonType::string_t& name,
const typename BasicJsonType::string_t& value)
{
write_bson_entry_header(name, 0x02);
......@@ -9027,7 +9086,8 @@ class binary_writer
/*!
@brief Writes a BSON element with key @a name and integer @a value
*/
void write_bson_integer(const typename BasicJsonType::string_t& name, const std::int64_t value)
void write_bson_integer(const typename BasicJsonType::string_t& name,
const std::int64_t value)
{
if ((std::numeric_limits<std::int32_t>::min)() <= value and value <= (std::numeric_limits<std::int32_t>::max)())
{
......@@ -9041,7 +9101,6 @@ class binary_writer
}
}
/*!
@return The size of the BSON-encoded unsigned integer in @a j
*/
......@@ -9060,7 +9119,8 @@ class binary_writer
/*!
@brief Writes a BSON element with key @a name and unsigned @a value
*/
void write_bson_unsigned(const typename BasicJsonType::string_t& name, const std::uint64_t value)
void write_bson_unsigned(const typename BasicJsonType::string_t& name,
const std::uint64_t value)
{
if (value <= static_cast<std::uint64_t>((std::numeric_limits<std::int32_t>::max)()))
{
......@@ -9082,13 +9142,13 @@ class binary_writer
/*!
@brief Writes a BSON element with key @a name and object @a value
*/
void write_bson_object_entry(const typename BasicJsonType::string_t& name, const typename BasicJsonType::object_t& value)
void write_bson_object_entry(const typename BasicJsonType::string_t& name,
const typename BasicJsonType::object_t& value)
{
write_bson_entry_header(name, 0x03); // object
write_bson_object(value);
}
/*!
@return The size of the BSON-encoded array @a value
*/
......@@ -9107,10 +9167,11 @@ class binary_writer
/*!
@brief Writes a BSON element with key @a name and array @a value
*/
void write_bson_array(const typename BasicJsonType::string_t& name, const typename BasicJsonType::array_t& value)
void write_bson_array(const typename BasicJsonType::string_t& name,
const typename BasicJsonType::array_t& value)
{
write_bson_entry_header(name, 0x04); // array
write_number<std::int32_t, true>(calc_bson_array_size(value));
write_number<std::int32_t, true>(static_cast<std::int32_t>(calc_bson_array_size(value)));
for (const auto& el : value)
{
......@@ -9120,75 +9181,95 @@ class binary_writer
oa->write_character(static_cast<CharType>(0x00));
}
/*!
@brief Calculates the size necessary to serialize the JSON value @a j with its @a name
@return The calculated size for the BSON document entry for @a j with the given @a name.
*/
static std::size_t calc_bson_element_size(const typename BasicJsonType::string_t& name, const BasicJsonType& j)
static std::size_t calc_bson_element_size(const typename BasicJsonType::string_t& name,
const BasicJsonType& j)
{
const auto header_size = calc_bson_entry_header_size(name);
switch (j.type())
{
// LCOV_EXCL_START
default:
assert(false);
return 0ul;
// LCOV_EXCL_STOP
case value_t::discarded:
return 0ul;
case value_t::object:
return header_size + calc_bson_object_size(*j.m_value.object);
case value_t::array:
return header_size + calc_bson_array_size(*j.m_value.array);
case value_t::boolean:
return header_size + 1ul;
case value_t::number_float:
return header_size + 8ul;
case value_t::number_integer:
return header_size + calc_bson_integer_size(j.m_value.number_integer);
case value_t::number_unsigned:
return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned);
case value_t::string:
return header_size + calc_bson_string_size(*j.m_value.string);
case value_t::null:
return header_size + 0ul;
// LCOV_EXCL_START
default:
assert(false);
return 0ul;
// LCOV_EXCL_STOP
};
}
/*!
@brief Serializes the JSON value @a j to BSON and associates it with the key @a name.
@param name The name to associate with the JSON entity @a j within the current BSON document
@return The size of the bson entry
@brief Serializes the JSON value @a j to BSON and associates it with the
key @a name.
@param name The name to associate with the JSON entity @a j within the
current BSON document
@return The size of the BSON entry
*/
void write_bson_element(const typename BasicJsonType::string_t& name, const BasicJsonType& j)
void write_bson_element(const typename BasicJsonType::string_t& name,
const BasicJsonType& j)
{
switch (j.type())
{
// LCOV_EXCL_START
default:
assert(false);
return;
// LCOV_EXCL_STOP
case value_t::discarded:
return;
case value_t::object:
return write_bson_object_entry(name, *j.m_value.object);
case value_t::array:
return write_bson_array(name, *j.m_value.array);
case value_t::boolean:
return write_bson_boolean(name, j.m_value.boolean);
case value_t::number_float:
return write_bson_double(name, j.m_value.number_float);
case value_t::number_integer:
return write_bson_integer(name, j.m_value.number_integer);
case value_t::number_unsigned:
return write_bson_unsigned(name, j.m_value.number_unsigned);
case value_t::string:
return write_bson_string(name, *j.m_value.string);
case value_t::null:
return write_bson_null(name);
// LCOV_EXCL_START
default:
assert(false);
return;
// LCOV_EXCL_STOP
};
}
......@@ -9216,7 +9297,7 @@ class binary_writer
*/
void write_bson_object(const typename BasicJsonType::object_t& value)
{
write_number<std::int32_t, true>(calc_bson_object_size(value));
write_number<std::int32_t, true>(static_cast<std::int32_t>(calc_bson_object_size(value)));
for (const auto& el : value)
{
......@@ -9226,64 +9307,38 @@ class binary_writer
oa->write_character(static_cast<CharType>(0x00));
}
/*!
@param[in] j JSON value to serialize
@pre j.type() == value_t::object
*/
void write_bson(const BasicJsonType& j)
{
switch (j.type())
{
case value_t::object:
{
write_bson_object(*j.m_value.object);
break;
}
case value_t::discarded:
{
break;
}
//////////
// CBOR //
//////////
default:
{
JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name())));
break;
}
}
static constexpr CharType get_cbor_float_prefix(float /*unused*/)
{
return static_cast<CharType>(0xFA); // Single-Precision Float
}
static constexpr CharType get_cbor_float_prefix(double /*unused*/)
{
return static_cast<CharType>(0xFB); // Double-Precision Float
}
private:
/*
@brief write a number to output input
@param[in] n number of type @a NumberType
@tparam NumberType the type of the number
@tparam OutputIsLittleEndian Set to true if output data is
required to be little endian
/////////////
// MsgPack //
/////////////
@note This function needs to respect the system's endianess, because bytes
in CBOR, MessagePack, and UBJSON are stored in network order (big
endian) and therefore need reordering on little endian systems.
*/
template<typename NumberType, bool OutputIsLittleEndian = false>
void write_number(const NumberType n)
static constexpr CharType get_msgpack_float_prefix(float /*unused*/)
{
// step 1: write number to array of length NumberType
std::array<CharType, sizeof(NumberType)> vec;
std::memcpy(vec.data(), &n, sizeof(NumberType));
// step 2: write array to output (with possible reordering)
if (is_little_endian and not OutputIsLittleEndian)
{
// reverse byte order prior to conversion if necessary
std::reverse(vec.begin(), vec.end());
}
return static_cast<CharType>(0xCA); // float 32
}
oa->write_characters(vec.data(), sizeof(NumberType));
static constexpr CharType get_msgpack_float_prefix(double /*unused*/)
{
return static_cast<CharType>(0xCB); // float 64
}
////////////
// UBJSON //
////////////
// UBJSON: write number (floating point)
template<typename NumberType, typename std::enable_if<
std::is_floating_point<NumberType>::value, int>::type = 0>
......@@ -9484,34 +9539,47 @@ class binary_writer
}
}
static constexpr CharType get_cbor_float_prefix(float /*unused*/)
static constexpr CharType get_ubjson_float_prefix(float /*unused*/)
{
return static_cast<CharType>(0xFA); // Single-Precision Float
return 'd'; // float 32
}
static constexpr CharType get_cbor_float_prefix(double /*unused*/)
static constexpr CharType get_ubjson_float_prefix(double /*unused*/)
{
return static_cast<CharType>(0xFB); // Double-Precision Float
return 'D'; // float 64
}
static constexpr CharType get_msgpack_float_prefix(float /*unused*/)
{
return static_cast<CharType>(0xCA); // float 32
}
///////////////////////
// Utility functions //
///////////////////////
static constexpr CharType get_msgpack_float_prefix(double /*unused*/)
{
return static_cast<CharType>(0xCB); // float 64
}
/*
@brief write a number to output input
static constexpr CharType get_ubjson_float_prefix(float /*unused*/)
{
return 'd'; // float 32
}
@param[in] n number of type @a NumberType
@tparam NumberType the type of the number
@tparam OutputIsLittleEndian Set to true if output data is
required to be little endian
static constexpr CharType get_ubjson_float_prefix(double /*unused*/)
@note This function needs to respect the system's endianess, because bytes
in CBOR, MessagePack, and UBJSON are stored in network order (big
endian) and therefore need reordering on little endian systems.
*/
template<typename NumberType, bool OutputIsLittleEndian = false>
void write_number(const NumberType n)
{
return 'D'; // float 64
// step 1: write number to array of length NumberType
std::array<CharType, sizeof(NumberType)> vec;
std::memcpy(vec.data(), &n, sizeof(NumberType));
// step 2: write array to output (with possible reordering)
if (is_little_endian and not OutputIsLittleEndian)
{
// reverse byte order prior to conversion if necessary
std::reverse(vec.begin(), vec.end());
}
oa->write_characters(vec.data(), sizeof(NumberType));
}
private:
......
......@@ -48,7 +48,7 @@ TEST_CASE("BSON")
SECTION("null")
{
json j = nullptr;
REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is null");
}
......@@ -57,14 +57,14 @@ TEST_CASE("BSON")
SECTION("true")
{
json j = true;
REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is boolean");
}
SECTION("false")
{
json j = false;
REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is boolean");
}
}
......@@ -72,29 +72,29 @@ TEST_CASE("BSON")
SECTION("number")
{
json j = 42;
REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is number");
CHECK_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is number");
}
SECTION("float")
{
json j = 4.2;
REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is number");
CHECK_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is number");
}
SECTION("string")
{
json j = "not supported";
REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is string");
CHECK_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is string");
}
SECTION("array")
{
json j = std::vector<int> {1, 2, 3, 4, 5, 6, 7};
REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is array");
CHECK_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is array");
}
}
......@@ -104,7 +104,7 @@ TEST_CASE("BSON")
{
{ std::string("en\0try", 6), true }
};
REQUIRE_THROWS_AS(json::to_bson(j), json::out_of_range&);
CHECK_THROWS_AS(json::to_bson(j), json::out_of_range&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.out_of_range.409] BSON key cannot contain code point U+0000 (at byte 2)");
}
......@@ -541,6 +541,30 @@ TEST_CASE("BSON")
CHECK(json::from_bson(result, true, false) == j);
}
}
SECTION("Examples from http://bsonspec.org/faq.html")
{
SECTION("Example 1")
{
std::vector<std::uint8_t> input = {0x16, 0x00, 0x00, 0x00, 0x02, 'h', 'e', 'l', 'l', 'o', 0x00, 0x06, 0x00, 0x00, 0x00, 'w', 'o', 'r', 'l', 'd', 0x00, 0x00};
json parsed = json::from_bson(input);
json expected = {{"hello", "world"}};
CHECK(parsed == expected);
auto dumped = json::to_bson(parsed);
CHECK(dumped == input);
}
SECTION("Example 2")
{
std::vector<std::uint8_t> input = {0x31, 0x00, 0x00, 0x00, 0x04, 'B', 'S', 'O', 'N', 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x30, 0x00, 0x08, 0x00, 0x00, 0x00, 'a', 'w', 'e', 's', 'o', 'm', 'e', 0x00, 0x01, 0x31, 0x00, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x14, 0x40, 0x10, 0x32, 0x00, 0xc2, 0x07, 0x00, 0x00, 0x00, 0x00};
json parsed = json::from_bson(input);
json expected = {{"BSON", {"awesome", 5.05, 1986}}};
CHECK(parsed == expected);
auto dumped = json::to_bson(parsed);
//CHECK(dumped == input); // see https://github.com/nlohmann/json/pull/1254#issuecomment-432831216
CHECK(json::from_bson(dumped) == expected);
}
}
}
TEST_CASE("BSON input/output_adapters")
......@@ -601,10 +625,6 @@ TEST_CASE("BSON input/output_adapters")
}
}
class SaxCountdown
{
public:
......@@ -675,86 +695,84 @@ class SaxCountdown
int events_left = 0;
};
TEST_CASE("Incomplete BSON INPUT")
TEST_CASE("Incomplete BSON Input")
{
std::vector<uint8_t> incomplete_bson =
SECTION("Incomplete BSON Input 1")
{
0x0D, 0x00, 0x00, 0x00, // size (little endian)
0x08, // entry: boolean
'e', 'n', 't' // unexpected EOF
};
std::vector<uint8_t> incomplete_bson =
{
0x0D, 0x00, 0x00, 0x00, // size (little endian)
0x08, // entry: boolean
'e', 'n', 't' // unexpected EOF
};
CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&);
CHECK_THROWS_WITH(json::from_bson(incomplete_bson),
"[json.exception.parse_error.110] parse error at byte 9: syntax error while parsing BSON cstring: unexpected end of input");
CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&);
CHECK_THROWS_WITH(json::from_bson(incomplete_bson),
"[json.exception.parse_error.110] parse error at byte 9: syntax error while parsing BSON cstring: unexpected end of input");
CHECK(json::from_bson(incomplete_bson, true, false).is_discarded());
CHECK(json::from_bson(incomplete_bson, true, false).is_discarded());
SaxCountdown scp(0);
CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson));
}
SaxCountdown scp(0);
CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson));
}
TEST_CASE("Incomplete BSON INPUT 2")
{
std::vector<uint8_t> incomplete_bson =
SECTION("Incomplete BSON Input 2")
{
0x0D, 0x00, 0x00, 0x00, // size (little endian)
0x08, // entry: boolean, unexpected EOF
};
std::vector<uint8_t> incomplete_bson =
{
0x0D, 0x00, 0x00, 0x00, // size (little endian)
0x08, // entry: boolean, unexpected EOF
};
CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&);
CHECK_THROWS_WITH(json::from_bson(incomplete_bson),
"[json.exception.parse_error.110] parse error at byte 6: syntax error while parsing BSON cstring: unexpected end of input");
CHECK(json::from_bson(incomplete_bson, true, false).is_discarded());
SaxCountdown scp(0);
CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson));
}
CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&);
CHECK_THROWS_WITH(json::from_bson(incomplete_bson),
"[json.exception.parse_error.110] parse error at byte 6: syntax error while parsing BSON cstring: unexpected end of input");
CHECK(json::from_bson(incomplete_bson, true, false).is_discarded());
SaxCountdown scp(0);
CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson));
}
TEST_CASE("Incomplete BSON INPUT 3")
{
std::vector<uint8_t> incomplete_bson =
SECTION("Incomplete BSON Input 3")
{
0x41, 0x00, 0x00, 0x00, // size (little endian)
0x04, /// entry: embedded document
'e', 'n', 't', 'r', 'y', '\x00',
0x35, 0x00, 0x00, 0x00, // size (little endian)
0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x10, 0x00, 0x02, 0x00, 0x00, 0x00
// missing input data...
};
CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&);
CHECK_THROWS_WITH(json::from_bson(incomplete_bson),
"[json.exception.parse_error.110] parse error at byte 28: syntax error while parsing BSON element list: unexpected end of input");
CHECK(json::from_bson(incomplete_bson, true, false).is_discarded());
SaxCountdown scp(1);
CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson));
}
std::vector<uint8_t> incomplete_bson =
{
0x41, 0x00, 0x00, 0x00, // size (little endian)
0x04, /// entry: embedded document
'e', 'n', 't', 'r', 'y', '\x00',
0x35, 0x00, 0x00, 0x00, // size (little endian)
0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x10, 0x00, 0x02, 0x00, 0x00, 0x00
// missing input data...
};
CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&);
CHECK_THROWS_WITH(json::from_bson(incomplete_bson),
"[json.exception.parse_error.110] parse error at byte 28: syntax error while parsing BSON element list: unexpected end of input");
CHECK(json::from_bson(incomplete_bson, true, false).is_discarded());
SaxCountdown scp(1);
CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson));
}
TEST_CASE("Incomplete BSON INPUT 4")
{
std::vector<uint8_t> incomplete_bson =
SECTION("Incomplete BSON Input 4")
{
0x0D, 0x00, // size (incomplete), unexpected EOF
};
std::vector<uint8_t> incomplete_bson =
{
0x0D, 0x00, // size (incomplete), unexpected EOF
};
CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&);
CHECK_THROWS_WITH(json::from_bson(incomplete_bson),
"[json.exception.parse_error.110] parse error at byte 3: syntax error while parsing BSON number: unexpected end of input");
CHECK(json::from_bson(incomplete_bson, true, false).is_discarded());
CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&);
CHECK_THROWS_WITH(json::from_bson(incomplete_bson),
"[json.exception.parse_error.110] parse error at byte 3: syntax error while parsing BSON number: unexpected end of input");
CHECK(json::from_bson(incomplete_bson, true, false).is_discarded());
SaxCountdown scp(0);
CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson));
SaxCountdown scp(0);
CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson));
}
}
TEST_CASE("Unsupported BSON input")
{
std::vector<uint8_t> bson =
......@@ -774,8 +792,6 @@ TEST_CASE("Unsupported BSON input")
CHECK(not json::sax_parse(bson, &scp, json::input_format_t::bson));
}
TEST_CASE("BSON numerical data")
{
SECTION("number")
......@@ -1205,12 +1221,12 @@ TEST_CASE("BSON roundtrips", "[hide]")
(std::istreambuf_iterator<char>(f_bson)),
std::istreambuf_iterator<char>());
SECTION(filename + ": output adapters: std::vector<uint8_t>")
{
std::vector<uint8_t> vec;
json::to_bson(j1, vec);
CHECK(vec == packed);
}
SECTION(filename + ": output adapters: std::vector<uint8_t>")
{
std::vector<uint8_t> vec;
json::to_bson(j1, vec);
CHECK(vec == packed);
}
}
}
}
......
......@@ -139,10 +139,10 @@ bool operator==(Data const& lhs, Data const& rhs)
return lhs.a == rhs.a && lhs.b == rhs.b;
}
bool operator!=(Data const& lhs, Data const& rhs)
{
return !(lhs == rhs);
}
//bool operator!=(Data const& lhs, Data const& rhs)
//{
// return !(lhs == rhs);
//}
}
/////////////////////////////////////////////////////////////////////
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment