Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
J
json
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Chen Yisong
json
Commits
a3473fda
Unverified
Commit
a3473fda
authored
Aug 14, 2017
by
Théo DELRIEU
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add detail/serializer.hpp
parent
c117515e
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
797 additions
and
769 deletions
+797
-769
Makefile
Makefile
+2
-1
serializer.hpp
src/detail/serializer.hpp
+794
-0
json.hpp
src/json.hpp
+1
-768
No files found.
Makefile
View file @
a3473fda
...
@@ -20,7 +20,8 @@ SRCS = ${SRCDIR}/json.hpp \
...
@@ -20,7 +20,8 @@ SRCS = ${SRCDIR}/json.hpp \
${
SRCDIR
}
/detail/iterators/json_reverse_iterator.hpp
\
${
SRCDIR
}
/detail/iterators/json_reverse_iterator.hpp
\
${
SRCDIR
}
/detail/parsing/output_adapters.hpp
\
${
SRCDIR
}
/detail/parsing/output_adapters.hpp
\
${
SRCDIR
}
/detail/parsing/binary_reader.hpp
\
${
SRCDIR
}
/detail/parsing/binary_reader.hpp
\
${
SRCDIR
}
/detail/parsing/binary_writer.hpp
${
SRCDIR
}
/detail/parsing/binary_writer.hpp
\
${
SRCDIR
}
/detail/serializer.hpp
# main target
# main target
all
:
all
:
...
...
src/detail/serializer.hpp
0 → 100644
View file @
a3473fda
#ifndef NLOHMANN_JSON_DETAIL_SERIALIZER_HPP
#define NLOHMANN_JSON_DETAIL_SERIALIZER_HPP
#include <algorithm>
#include <array>
#include <clocale>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <iterator>
#include <limits>
#include <string>
#include <type_traits>
#include "detail/macro_scope.hpp"
#include "detail/meta.hpp"
#include "detail/parsing/output_adapters.hpp"
namespace
nlohmann
{
namespace
detail
{
///////////////////
// serialization //
///////////////////
template
<
typename
BasicJsonType
>
class
serializer
{
using
string_t
=
typename
BasicJsonType
::
string_t
;
using
number_float_t
=
typename
BasicJsonType
::
number_float_t
;
using
number_integer_t
=
typename
BasicJsonType
::
number_integer_t
;
using
number_unsigned_t
=
typename
BasicJsonType
::
number_unsigned_t
;
public
:
/*!
@param[in] s output stream to serialize to
@param[in] ichar indentation character to use
*/
serializer
(
output_adapter_t
<
char
>
s
,
const
char
ichar
)
:
o
(
std
::
move
(
s
)),
loc
(
std
::
localeconv
()),
thousands_sep
(
loc
->
thousands_sep
==
nullptr
?
'\0'
:
*
(
loc
->
thousands_sep
)),
decimal_point
(
loc
->
decimal_point
==
nullptr
?
'\0'
:
*
(
loc
->
decimal_point
)),
indent_char
(
ichar
),
indent_string
(
512
,
indent_char
)
{}
// delete because of pointer members
serializer
(
const
serializer
&
)
=
delete
;
serializer
&
operator
=
(
const
serializer
&
)
=
delete
;
/*!
@brief internal implementation of the serialization function
This function is called by the public member function dump and organizes
the serialization internally. The indentation level is propagated as
additional parameter. In case of arrays and objects, the function is
called recursively.
- strings and object keys are escaped using `escape_string()`
- integer numbers are converted implicitly via `operator<<`
- floating-point numbers are converted to a string using `"%g"` format
@param[in] val value to serialize
@param[in] pretty_print whether the output shall be pretty-printed
@param[in] indent_step the indent level
@param[in] current_indent the current indent level (only used internally)
*/
void
dump
(
const
BasicJsonType
&
val
,
const
bool
pretty_print
,
const
bool
ensure_ascii
,
const
unsigned
int
indent_step
,
const
unsigned
int
current_indent
=
0
)
{
switch
(
val
.
m_type
)
{
case
value_t
:
:
object
:
{
if
(
val
.
m_value
.
object
->
empty
())
{
o
->
write_characters
(
"{}"
,
2
);
return
;
}
if
(
pretty_print
)
{
o
->
write_characters
(
"{
\n
"
,
2
);
// variable to hold indentation for recursive calls
const
auto
new_indent
=
current_indent
+
indent_step
;
if
(
JSON_UNLIKELY
(
indent_string
.
size
()
<
new_indent
))
{
indent_string
.
resize
(
indent_string
.
size
()
*
2
,
' '
);
}
// first n-1 elements
auto
i
=
val
.
m_value
.
object
->
cbegin
();
for
(
std
::
size_t
cnt
=
0
;
cnt
<
val
.
m_value
.
object
->
size
()
-
1
;
++
cnt
,
++
i
)
{
o
->
write_characters
(
indent_string
.
c_str
(),
new_indent
);
o
->
write_character
(
'\"'
);
dump_escaped
(
i
->
first
,
ensure_ascii
);
o
->
write_characters
(
"
\"
: "
,
3
);
dump
(
i
->
second
,
true
,
ensure_ascii
,
indent_step
,
new_indent
);
o
->
write_characters
(
",
\n
"
,
2
);
}
// last element
assert
(
i
!=
val
.
m_value
.
object
->
cend
());
assert
(
std
::
next
(
i
)
==
val
.
m_value
.
object
->
cend
());
o
->
write_characters
(
indent_string
.
c_str
(),
new_indent
);
o
->
write_character
(
'\"'
);
dump_escaped
(
i
->
first
,
ensure_ascii
);
o
->
write_characters
(
"
\"
: "
,
3
);
dump
(
i
->
second
,
true
,
ensure_ascii
,
indent_step
,
new_indent
);
o
->
write_character
(
'\n'
);
o
->
write_characters
(
indent_string
.
c_str
(),
current_indent
);
o
->
write_character
(
'}'
);
}
else
{
o
->
write_character
(
'{'
);
// first n-1 elements
auto
i
=
val
.
m_value
.
object
->
cbegin
();
for
(
std
::
size_t
cnt
=
0
;
cnt
<
val
.
m_value
.
object
->
size
()
-
1
;
++
cnt
,
++
i
)
{
o
->
write_character
(
'\"'
);
dump_escaped
(
i
->
first
,
ensure_ascii
);
o
->
write_characters
(
"
\"
:"
,
2
);
dump
(
i
->
second
,
false
,
ensure_ascii
,
indent_step
,
current_indent
);
o
->
write_character
(
','
);
}
// last element
assert
(
i
!=
val
.
m_value
.
object
->
cend
());
assert
(
std
::
next
(
i
)
==
val
.
m_value
.
object
->
cend
());
o
->
write_character
(
'\"'
);
dump_escaped
(
i
->
first
,
ensure_ascii
);
o
->
write_characters
(
"
\"
:"
,
2
);
dump
(
i
->
second
,
false
,
ensure_ascii
,
indent_step
,
current_indent
);
o
->
write_character
(
'}'
);
}
return
;
}
case
value_t
:
:
array
:
{
if
(
val
.
m_value
.
array
->
empty
())
{
o
->
write_characters
(
"[]"
,
2
);
return
;
}
if
(
pretty_print
)
{
o
->
write_characters
(
"[
\n
"
,
2
);
// variable to hold indentation for recursive calls
const
auto
new_indent
=
current_indent
+
indent_step
;
if
(
JSON_UNLIKELY
(
indent_string
.
size
()
<
new_indent
))
{
indent_string
.
resize
(
indent_string
.
size
()
*
2
,
' '
);
}
// first n-1 elements
for
(
auto
i
=
val
.
m_value
.
array
->
cbegin
();
i
!=
val
.
m_value
.
array
->
cend
()
-
1
;
++
i
)
{
o
->
write_characters
(
indent_string
.
c_str
(),
new_indent
);
dump
(
*
i
,
true
,
ensure_ascii
,
indent_step
,
new_indent
);
o
->
write_characters
(
",
\n
"
,
2
);
}
// last element
assert
(
not
val
.
m_value
.
array
->
empty
());
o
->
write_characters
(
indent_string
.
c_str
(),
new_indent
);
dump
(
val
.
m_value
.
array
->
back
(),
true
,
ensure_ascii
,
indent_step
,
new_indent
);
o
->
write_character
(
'\n'
);
o
->
write_characters
(
indent_string
.
c_str
(),
current_indent
);
o
->
write_character
(
']'
);
}
else
{
o
->
write_character
(
'['
);
// first n-1 elements
for
(
auto
i
=
val
.
m_value
.
array
->
cbegin
();
i
!=
val
.
m_value
.
array
->
cend
()
-
1
;
++
i
)
{
dump
(
*
i
,
false
,
ensure_ascii
,
indent_step
,
current_indent
);
o
->
write_character
(
','
);
}
// last element
assert
(
not
val
.
m_value
.
array
->
empty
());
dump
(
val
.
m_value
.
array
->
back
(),
false
,
ensure_ascii
,
indent_step
,
current_indent
);
o
->
write_character
(
']'
);
}
return
;
}
case
value_t
:
:
string
:
{
o
->
write_character
(
'\"'
);
dump_escaped
(
*
val
.
m_value
.
string
,
ensure_ascii
);
o
->
write_character
(
'\"'
);
return
;
}
case
value_t
:
:
boolean
:
{
if
(
val
.
m_value
.
boolean
)
{
o
->
write_characters
(
"true"
,
4
);
}
else
{
o
->
write_characters
(
"false"
,
5
);
}
return
;
}
case
value_t
:
:
number_integer
:
{
dump_integer
(
val
.
m_value
.
number_integer
);
return
;
}
case
value_t
:
:
number_unsigned
:
{
dump_integer
(
val
.
m_value
.
number_unsigned
);
return
;
}
case
value_t
:
:
number_float
:
{
dump_float
(
val
.
m_value
.
number_float
);
return
;
}
case
value_t
:
:
discarded
:
{
o
->
write_characters
(
"<discarded>"
,
11
);
return
;
}
case
value_t
:
:
null
:
{
o
->
write_characters
(
"null"
,
4
);
return
;
}
}
}
private
:
/*!
@brief returns the number of expected bytes following in UTF-8 string
@param[in] u the first byte of a UTF-8 string
@return the number of expected bytes following
*/
static
constexpr
std
::
size_t
bytes_following
(
const
uint8_t
u
)
{
return
((
u
<=
127
)
?
0
:
((
192
<=
u
and
u
<=
223
)
?
1
:
((
224
<=
u
and
u
<=
239
)
?
2
:
((
240
<=
u
and
u
<=
247
)
?
3
:
std
::
string
::
npos
))));
}
/*!
@brief calculates the extra space to escape a JSON string
@param[in] s the string to escape
@param[in] ensure_ascii whether to escape non-ASCII characters with
\uXXXX sequences
@return the number of characters required to escape string @a s
@complexity Linear in the length of string @a s.
*/
static
std
::
size_t
extra_space
(
const
string_t
&
s
,
const
bool
ensure_ascii
)
noexcept
{
std
::
size_t
res
=
0
;
for
(
std
::
size_t
i
=
0
;
i
<
s
.
size
();
++
i
)
{
switch
(
s
[
i
])
{
// control characters that can be escaped with a backslash
case
'"'
:
case
'\\'
:
case
'\b'
:
case
'\f'
:
case
'\n'
:
case
'\r'
:
case
'\t'
:
{
// from c (1 byte) to \x (2 bytes)
res
+=
1
;
break
;
}
// control characters that need \uxxxx escaping
case
0x00
:
case
0x01
:
case
0x02
:
case
0x03
:
case
0x04
:
case
0x05
:
case
0x06
:
case
0x07
:
case
0x0B
:
case
0x0E
:
case
0x0F
:
case
0x10
:
case
0x11
:
case
0x12
:
case
0x13
:
case
0x14
:
case
0x15
:
case
0x16
:
case
0x17
:
case
0x18
:
case
0x19
:
case
0x1A
:
case
0x1B
:
case
0x1C
:
case
0x1D
:
case
0x1E
:
case
0x1F
:
{
// from c (1 byte) to \uxxxx (6 bytes)
res
+=
5
;
break
;
}
default
:
{
if
(
ensure_ascii
and
(
s
[
i
]
&
0x80
or
s
[
i
]
==
0x7F
))
{
const
auto
bytes
=
bytes_following
(
static_cast
<
uint8_t
>
(
s
[
i
]));
// invalid characters will be detected by throw_if_invalid_utf8
assert
(
bytes
!=
std
::
string
::
npos
);
if
(
bytes
==
3
)
{
// codepoints that need 4 bytes (i.e., 3 additional
// bytes) in UTF-8 need a surrogate pair when \u
// escaping is used: from 4 bytes to \uxxxx\uxxxx
// (12 bytes)
res
+=
(
12
-
bytes
-
1
);
}
else
{
// from x bytes to \uxxxx (6 bytes)
res
+=
(
6
-
bytes
-
1
);
}
// skip the additional bytes
i
+=
bytes
;
}
break
;
}
}
}
return
res
;
}
static
void
escape_codepoint
(
int
codepoint
,
string_t
&
result
,
std
::
size_t
&
pos
)
{
// expecting a proper codepoint
assert
(
0x00
<=
codepoint
and
codepoint
<=
0x10FFFF
);
// the last written character was the backslash before the 'u'
assert
(
result
[
pos
]
==
'\\'
);
// write the 'u'
result
[
++
pos
]
=
'u'
;
// convert a number 0..15 to its hex representation (0..f)
static
const
std
::
array
<
char
,
16
>
hexify
=
{
{
'0'
,
'1'
,
'2'
,
'3'
,
'4'
,
'5'
,
'6'
,
'7'
,
'8'
,
'9'
,
'a'
,
'b'
,
'c'
,
'd'
,
'e'
,
'f'
}
};
if
(
codepoint
<
0x10000
)
{
// codepoints U+0000..U+FFFF can be represented as \uxxxx.
result
[
++
pos
]
=
hexify
[(
codepoint
>>
12
)
&
0x0F
];
result
[
++
pos
]
=
hexify
[(
codepoint
>>
8
)
&
0x0F
];
result
[
++
pos
]
=
hexify
[(
codepoint
>>
4
)
&
0x0F
];
result
[
++
pos
]
=
hexify
[
codepoint
&
0x0F
];
}
else
{
// codepoints U+10000..U+10FFFF need a surrogate pair to be
// represented as \uxxxx\uxxxx.
// http://www.unicode.org/faq/utf_bom.html#utf16-4
codepoint
-=
0x10000
;
const
int
high_surrogate
=
0xD800
|
((
codepoint
>>
10
)
&
0x3FF
);
const
int
low_surrogate
=
0xDC00
|
(
codepoint
&
0x3FF
);
result
[
++
pos
]
=
hexify
[(
high_surrogate
>>
12
)
&
0x0F
];
result
[
++
pos
]
=
hexify
[(
high_surrogate
>>
8
)
&
0x0F
];
result
[
++
pos
]
=
hexify
[(
high_surrogate
>>
4
)
&
0x0F
];
result
[
++
pos
]
=
hexify
[
high_surrogate
&
0x0F
];
++
pos
;
// backslash is already in output
result
[
++
pos
]
=
'u'
;
result
[
++
pos
]
=
hexify
[(
low_surrogate
>>
12
)
&
0x0F
];
result
[
++
pos
]
=
hexify
[(
low_surrogate
>>
8
)
&
0x0F
];
result
[
++
pos
]
=
hexify
[(
low_surrogate
>>
4
)
&
0x0F
];
result
[
++
pos
]
=
hexify
[
low_surrogate
&
0x0F
];
}
++
pos
;
}
/*!
@brief dump escaped string
Escape a string by replacing certain special characters by a sequence of an
escape character (backslash) and another character and other control
characters by a sequence of "\u" followed by a four-digit hex
representation. The escaped string is written to output stream @a o.
@param[in] s the string to escape
@param[in] ensure_ascii whether to escape non-ASCII characters with
\uXXXX sequences
@complexity Linear in the length of string @a s.
*/
void
dump_escaped
(
const
string_t
&
s
,
const
bool
ensure_ascii
)
const
{
throw_if_invalid_utf8
(
s
);
const
auto
space
=
extra_space
(
s
,
ensure_ascii
);
if
(
space
==
0
)
{
o
->
write_characters
(
s
.
c_str
(),
s
.
size
());
return
;
}
// create a result string of necessary size
string_t
result
(
s
.
size
()
+
space
,
'\\'
);
std
::
size_t
pos
=
0
;
for
(
std
::
size_t
i
=
0
;
i
<
s
.
size
();
++
i
)
{
switch
(
s
[
i
])
{
case
'"'
:
// quotation mark (0x22)
{
result
[
pos
+
1
]
=
'"'
;
pos
+=
2
;
break
;
}
case
'\\'
:
// reverse solidus (0x5C)
{
// nothing to change
pos
+=
2
;
break
;
}
case
'\b'
:
// backspace (0x08)
{
result
[
pos
+
1
]
=
'b'
;
pos
+=
2
;
break
;
}
case
'\f'
:
// formfeed (0x0C)
{
result
[
pos
+
1
]
=
'f'
;
pos
+=
2
;
break
;
}
case
'\n'
:
// newline (0x0A)
{
result
[
pos
+
1
]
=
'n'
;
pos
+=
2
;
break
;
}
case
'\r'
:
// carriage return (0x0D)
{
result
[
pos
+
1
]
=
'r'
;
pos
+=
2
;
break
;
}
case
'\t'
:
// horizontal tab (0x09)
{
result
[
pos
+
1
]
=
't'
;
pos
+=
2
;
break
;
}
default
:
{
// escape control characters (0x00..0x1F) or, if
// ensure_ascii parameter is used, non-ASCII characters
if
((
0x00
<=
s
[
i
]
and
s
[
i
]
<=
0x1F
)
or
(
ensure_ascii
and
(
s
[
i
]
&
0x80
or
s
[
i
]
==
0x7F
)))
{
const
auto
bytes
=
bytes_following
(
static_cast
<
uint8_t
>
(
s
[
i
]));
// invalid characters will be detected by throw_if_invalid_utf8
assert
(
bytes
!=
std
::
string
::
npos
);
// check that the additional bytes are present
assert
(
i
+
bytes
<
s
.
size
());
// to use \uxxxx escaping, we first need to calculate
// the codepoint from the UTF-8 bytes
int
codepoint
=
0
;
// bytes is unsigned type:
assert
(
bytes
<=
3
);
switch
(
bytes
)
{
case
0
:
{
codepoint
=
s
[
i
]
&
0xFF
;
break
;
}
case
1
:
{
codepoint
=
((
s
[
i
]
&
0x3F
)
<<
6
)
+
(
s
[
i
+
1
]
&
0x7F
);
break
;
}
case
2
:
{
codepoint
=
((
s
[
i
]
&
0x1F
)
<<
12
)
+
((
s
[
i
+
1
]
&
0x7F
)
<<
6
)
+
(
s
[
i
+
2
]
&
0x7F
);
break
;
}
case
3
:
{
codepoint
=
((
s
[
i
]
&
0xF
)
<<
18
)
+
((
s
[
i
+
1
]
&
0x7F
)
<<
12
)
+
((
s
[
i
+
2
]
&
0x7F
)
<<
6
)
+
(
s
[
i
+
3
]
&
0x7F
);
break
;
}
default
:
break
;
// LCOV_EXCL_LINE
}
escape_codepoint
(
codepoint
,
result
,
pos
);
i
+=
bytes
;
}
else
{
// all other characters are added as-is
result
[
pos
++
]
=
s
[
i
];
}
break
;
}
}
}
assert
(
pos
==
result
.
size
());
o
->
write_characters
(
result
.
c_str
(),
result
.
size
());
}
/*!
@brief dump an integer
Dump a given integer to output stream @a o. Works internally with
@a number_buffer.
@param[in] x integer number (signed or unsigned) to dump
@tparam NumberType either @a number_integer_t or @a number_unsigned_t
*/
template
<
typename
NumberType
,
detail
::
enable_if_t
<
std
::
is_same
<
NumberType
,
number_unsigned_t
>::
value
or
std
::
is_same
<
NumberType
,
number_integer_t
>::
value
,
int
>
=
0
>
void
dump_integer
(
NumberType
x
)
{
// special case for "0"
if
(
x
==
0
)
{
o
->
write_character
(
'0'
);
return
;
}
const
bool
is_negative
=
(
x
<=
0
)
and
(
x
!=
0
);
// see issue #755
std
::
size_t
i
=
0
;
while
(
x
!=
0
)
{
// spare 1 byte for '\0'
assert
(
i
<
number_buffer
.
size
()
-
1
);
const
auto
digit
=
std
::
labs
(
static_cast
<
long
>
(
x
%
10
));
number_buffer
[
i
++
]
=
static_cast
<
char
>
(
'0'
+
digit
);
x
/=
10
;
}
if
(
is_negative
)
{
// make sure there is capacity for the '-'
assert
(
i
<
number_buffer
.
size
()
-
2
);
number_buffer
[
i
++
]
=
'-'
;
}
std
::
reverse
(
number_buffer
.
begin
(),
number_buffer
.
begin
()
+
i
);
o
->
write_characters
(
number_buffer
.
data
(),
i
);
}
/*!
@brief dump a floating-point number
Dump a given floating-point number to output stream @a o. Works internally
with @a number_buffer.
@param[in] x floating-point number to dump
*/
void
dump_float
(
number_float_t
x
)
{
// NaN / inf
if
(
not
std
::
isfinite
(
x
)
or
std
::
isnan
(
x
))
{
o
->
write_characters
(
"null"
,
4
);
return
;
}
// get number of digits for a text -> float -> text round-trip
static
constexpr
auto
d
=
std
::
numeric_limits
<
number_float_t
>::
digits10
;
// the actual conversion
std
::
ptrdiff_t
len
=
snprintf
(
number_buffer
.
data
(),
number_buffer
.
size
(),
"%.*g"
,
d
,
x
);
// negative value indicates an error
assert
(
len
>
0
);
// check if buffer was large enough
assert
(
static_cast
<
std
::
size_t
>
(
len
)
<
number_buffer
.
size
());
// erase thousands separator
if
(
thousands_sep
!=
'\0'
)
{
const
auto
end
=
std
::
remove
(
number_buffer
.
begin
(),
number_buffer
.
begin
()
+
len
,
thousands_sep
);
std
::
fill
(
end
,
number_buffer
.
end
(),
'\0'
);
assert
((
end
-
number_buffer
.
begin
())
<=
len
);
len
=
(
end
-
number_buffer
.
begin
());
}
// convert decimal point to '.'
if
(
decimal_point
!=
'\0'
and
decimal_point
!=
'.'
)
{
const
auto
dec_pos
=
std
::
find
(
number_buffer
.
begin
(),
number_buffer
.
end
(),
decimal_point
);
if
(
dec_pos
!=
number_buffer
.
end
())
{
*
dec_pos
=
'.'
;
}
}
o
->
write_characters
(
number_buffer
.
data
(),
static_cast
<
std
::
size_t
>
(
len
));
// determine if need to append ".0"
const
bool
value_is_int_like
=
std
::
none_of
(
number_buffer
.
begin
(),
number_buffer
.
begin
()
+
len
+
1
,
[](
char
c
)
{
return
(
c
==
'.'
or
c
==
'e'
);
});
if
(
value_is_int_like
)
{
o
->
write_characters
(
".0"
,
2
);
}
}
/*!
@brief check whether a string is UTF-8 encoded
The function checks each byte of a string whether it is UTF-8 encoded. The
result of the check is stored in the @a state parameter. The function must
be called initially with state 0 (accept). State 1 means the string must
be rejected, because the current byte is not allowed. If the string is
completely processed, but the state is non-zero, the string ended
prematurely; that is, the last byte indicated more bytes should have
followed.
@param[in,out] state the state of the decoding
@param[in] byte next byte to decode
@note The function has been edited: a std::array is used and the code
point is not calculated.
@copyright Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
@sa http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
*/
static
void
decode
(
uint8_t
&
state
,
const
uint8_t
byte
)
{
static
const
std
::
array
<
uint8_t
,
400
>
utf8d
=
{
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
// 00..1F
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
// 20..3F
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
// 40..5F
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
// 60..7F
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
9
,
9
,
9
,
9
,
9
,
9
,
9
,
9
,
9
,
9
,
9
,
9
,
9
,
9
,
9
,
9
,
// 80..9F
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
// A0..BF
8
,
8
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
// C0..DF
0xA
,
0x3
,
0x3
,
0x3
,
0x3
,
0x3
,
0x3
,
0x3
,
0x3
,
0x3
,
0x3
,
0x3
,
0x3
,
0x4
,
0x3
,
0x3
,
// E0..EF
0xB
,
0x6
,
0x6
,
0x6
,
0x5
,
0x8
,
0x8
,
0x8
,
0x8
,
0x8
,
0x8
,
0x8
,
0x8
,
0x8
,
0x8
,
0x8
,
// F0..FF
0x0
,
0x1
,
0x2
,
0x3
,
0x5
,
0x8
,
0x7
,
0x1
,
0x1
,
0x1
,
0x4
,
0x6
,
0x1
,
0x1
,
0x1
,
0x1
,
// s0..s0
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
1
,
1
,
1
,
1
,
1
,
0
,
1
,
0
,
1
,
1
,
1
,
1
,
1
,
1
,
// s1..s2
1
,
2
,
1
,
1
,
1
,
1
,
1
,
2
,
1
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
// s3..s4
1
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
3
,
1
,
3
,
1
,
1
,
1
,
1
,
1
,
1
,
// s5..s6
1
,
3
,
1
,
1
,
1
,
1
,
1
,
3
,
1
,
3
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
3
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
// s7..s8
}
};
const
uint8_t
type
=
utf8d
[
byte
];
state
=
utf8d
[
256u
+
state
*
16u
+
type
];
}
/*!
@brief throw an exception if a string is not UTF-8 encoded
@param[in] str UTF-8 string to check
@throw type_error.316 if passed string is not UTF-8 encoded
@since version 3.0.0
*/
static
void
throw_if_invalid_utf8
(
const
std
::
string
&
str
)
{
// start with state 0 (= accept)
uint8_t
state
=
0
;
for
(
size_t
i
=
0
;
i
<
str
.
size
();
++
i
)
{
const
auto
byte
=
static_cast
<
uint8_t
>
(
str
[
i
]);
decode
(
state
,
byte
);
if
(
state
==
1
)
{
// state 1 means reject
std
::
stringstream
ss
;
ss
<<
std
::
setw
(
2
)
<<
std
::
uppercase
<<
std
::
setfill
(
'0'
)
<<
std
::
hex
<<
static_cast
<
int
>
(
byte
);
JSON_THROW
(
type_error
::
create
(
316
,
"invalid UTF-8 byte at index "
+
std
::
to_string
(
i
)
+
": 0x"
+
ss
.
str
()));
}
}
if
(
state
!=
0
)
{
// we finish reading, but do not accept: string was incomplete
std
::
stringstream
ss
;
ss
<<
std
::
setw
(
2
)
<<
std
::
uppercase
<<
std
::
setfill
(
'0'
)
<<
std
::
hex
<<
static_cast
<
int
>
(
static_cast
<
uint8_t
>
(
str
.
back
()));
JSON_THROW
(
type_error
::
create
(
316
,
"incomplete UTF-8 string; last byte: 0x"
+
ss
.
str
()));
}
}
private
:
/// the output of the serializer
output_adapter_t
<
char
>
o
=
nullptr
;
/// a (hopefully) large enough character buffer
std
::
array
<
char
,
64
>
number_buffer
{{}};
/// the locale
const
std
::
lconv
*
loc
=
nullptr
;
/// the locale's thousand separator character
const
char
thousands_sep
=
'\0'
;
/// the locale's decimal point character
const
char
decimal_point
=
'\0'
;
/// the indentation character
const
char
indent_char
;
/// the indentation string
string_t
indent_string
;
};
}
}
#endif
src/json.hpp
View file @
a3473fda
...
@@ -68,6 +68,7 @@ SOFTWARE.
...
@@ -68,6 +68,7 @@ SOFTWARE.
#include "detail/parsing/output_adapters.hpp"
#include "detail/parsing/output_adapters.hpp"
#include "detail/parsing/binary_reader.hpp"
#include "detail/parsing/binary_reader.hpp"
#include "detail/parsing/binary_writer.hpp"
#include "detail/parsing/binary_writer.hpp"
#include "detail/serializer.hpp"
/*!
/*!
@brief namespace for Niels Lohmann
@brief namespace for Niels Lohmann
...
@@ -78,774 +79,6 @@ namespace nlohmann
...
@@ -78,774 +79,6 @@ namespace nlohmann
{
{
namespace
detail
namespace
detail
{
{
///////////////////
// serialization //
///////////////////
template
<
typename
BasicJsonType
>
class
serializer
{
using
string_t
=
typename
BasicJsonType
::
string_t
;
using
number_float_t
=
typename
BasicJsonType
::
number_float_t
;
using
number_integer_t
=
typename
BasicJsonType
::
number_integer_t
;
using
number_unsigned_t
=
typename
BasicJsonType
::
number_unsigned_t
;
public
:
/*!
@param[in] s output stream to serialize to
@param[in] ichar indentation character to use
*/
serializer
(
output_adapter_t
<
char
>
s
,
const
char
ichar
)
:
o
(
std
::
move
(
s
)),
loc
(
std
::
localeconv
()),
thousands_sep
(
loc
->
thousands_sep
==
nullptr
?
'\0'
:
*
(
loc
->
thousands_sep
)),
decimal_point
(
loc
->
decimal_point
==
nullptr
?
'\0'
:
*
(
loc
->
decimal_point
)),
indent_char
(
ichar
),
indent_string
(
512
,
indent_char
)
{}
// delete because of pointer members
serializer
(
const
serializer
&
)
=
delete
;
serializer
&
operator
=
(
const
serializer
&
)
=
delete
;
/*!
@brief internal implementation of the serialization function
This function is called by the public member function dump and organizes
the serialization internally. The indentation level is propagated as
additional parameter. In case of arrays and objects, the function is
called recursively.
- strings and object keys are escaped using `escape_string()`
- integer numbers are converted implicitly via `operator<<`
- floating-point numbers are converted to a string using `"%g"` format
@param[in] val value to serialize
@param[in] pretty_print whether the output shall be pretty-printed
@param[in] indent_step the indent level
@param[in] current_indent the current indent level (only used internally)
*/
void
dump
(
const
BasicJsonType
&
val
,
const
bool
pretty_print
,
const
bool
ensure_ascii
,
const
unsigned
int
indent_step
,
const
unsigned
int
current_indent
=
0
)
{
switch
(
val
.
m_type
)
{
case
value_t
:
:
object
:
{
if
(
val
.
m_value
.
object
->
empty
())
{
o
->
write_characters
(
"{}"
,
2
);
return
;
}
if
(
pretty_print
)
{
o
->
write_characters
(
"{
\n
"
,
2
);
// variable to hold indentation for recursive calls
const
auto
new_indent
=
current_indent
+
indent_step
;
if
(
JSON_UNLIKELY
(
indent_string
.
size
()
<
new_indent
))
{
indent_string
.
resize
(
indent_string
.
size
()
*
2
,
' '
);
}
// first n-1 elements
auto
i
=
val
.
m_value
.
object
->
cbegin
();
for
(
std
::
size_t
cnt
=
0
;
cnt
<
val
.
m_value
.
object
->
size
()
-
1
;
++
cnt
,
++
i
)
{
o
->
write_characters
(
indent_string
.
c_str
(),
new_indent
);
o
->
write_character
(
'\"'
);
dump_escaped
(
i
->
first
,
ensure_ascii
);
o
->
write_characters
(
"
\"
: "
,
3
);
dump
(
i
->
second
,
true
,
ensure_ascii
,
indent_step
,
new_indent
);
o
->
write_characters
(
",
\n
"
,
2
);
}
// last element
assert
(
i
!=
val
.
m_value
.
object
->
cend
());
assert
(
std
::
next
(
i
)
==
val
.
m_value
.
object
->
cend
());
o
->
write_characters
(
indent_string
.
c_str
(),
new_indent
);
o
->
write_character
(
'\"'
);
dump_escaped
(
i
->
first
,
ensure_ascii
);
o
->
write_characters
(
"
\"
: "
,
3
);
dump
(
i
->
second
,
true
,
ensure_ascii
,
indent_step
,
new_indent
);
o
->
write_character
(
'\n'
);
o
->
write_characters
(
indent_string
.
c_str
(),
current_indent
);
o
->
write_character
(
'}'
);
}
else
{
o
->
write_character
(
'{'
);
// first n-1 elements
auto
i
=
val
.
m_value
.
object
->
cbegin
();
for
(
std
::
size_t
cnt
=
0
;
cnt
<
val
.
m_value
.
object
->
size
()
-
1
;
++
cnt
,
++
i
)
{
o
->
write_character
(
'\"'
);
dump_escaped
(
i
->
first
,
ensure_ascii
);
o
->
write_characters
(
"
\"
:"
,
2
);
dump
(
i
->
second
,
false
,
ensure_ascii
,
indent_step
,
current_indent
);
o
->
write_character
(
','
);
}
// last element
assert
(
i
!=
val
.
m_value
.
object
->
cend
());
assert
(
std
::
next
(
i
)
==
val
.
m_value
.
object
->
cend
());
o
->
write_character
(
'\"'
);
dump_escaped
(
i
->
first
,
ensure_ascii
);
o
->
write_characters
(
"
\"
:"
,
2
);
dump
(
i
->
second
,
false
,
ensure_ascii
,
indent_step
,
current_indent
);
o
->
write_character
(
'}'
);
}
return
;
}
case
value_t
:
:
array
:
{
if
(
val
.
m_value
.
array
->
empty
())
{
o
->
write_characters
(
"[]"
,
2
);
return
;
}
if
(
pretty_print
)
{
o
->
write_characters
(
"[
\n
"
,
2
);
// variable to hold indentation for recursive calls
const
auto
new_indent
=
current_indent
+
indent_step
;
if
(
JSON_UNLIKELY
(
indent_string
.
size
()
<
new_indent
))
{
indent_string
.
resize
(
indent_string
.
size
()
*
2
,
' '
);
}
// first n-1 elements
for
(
auto
i
=
val
.
m_value
.
array
->
cbegin
();
i
!=
val
.
m_value
.
array
->
cend
()
-
1
;
++
i
)
{
o
->
write_characters
(
indent_string
.
c_str
(),
new_indent
);
dump
(
*
i
,
true
,
ensure_ascii
,
indent_step
,
new_indent
);
o
->
write_characters
(
",
\n
"
,
2
);
}
// last element
assert
(
not
val
.
m_value
.
array
->
empty
());
o
->
write_characters
(
indent_string
.
c_str
(),
new_indent
);
dump
(
val
.
m_value
.
array
->
back
(),
true
,
ensure_ascii
,
indent_step
,
new_indent
);
o
->
write_character
(
'\n'
);
o
->
write_characters
(
indent_string
.
c_str
(),
current_indent
);
o
->
write_character
(
']'
);
}
else
{
o
->
write_character
(
'['
);
// first n-1 elements
for
(
auto
i
=
val
.
m_value
.
array
->
cbegin
();
i
!=
val
.
m_value
.
array
->
cend
()
-
1
;
++
i
)
{
dump
(
*
i
,
false
,
ensure_ascii
,
indent_step
,
current_indent
);
o
->
write_character
(
','
);
}
// last element
assert
(
not
val
.
m_value
.
array
->
empty
());
dump
(
val
.
m_value
.
array
->
back
(),
false
,
ensure_ascii
,
indent_step
,
current_indent
);
o
->
write_character
(
']'
);
}
return
;
}
case
value_t
:
:
string
:
{
o
->
write_character
(
'\"'
);
dump_escaped
(
*
val
.
m_value
.
string
,
ensure_ascii
);
o
->
write_character
(
'\"'
);
return
;
}
case
value_t
:
:
boolean
:
{
if
(
val
.
m_value
.
boolean
)
{
o
->
write_characters
(
"true"
,
4
);
}
else
{
o
->
write_characters
(
"false"
,
5
);
}
return
;
}
case
value_t
:
:
number_integer
:
{
dump_integer
(
val
.
m_value
.
number_integer
);
return
;
}
case
value_t
:
:
number_unsigned
:
{
dump_integer
(
val
.
m_value
.
number_unsigned
);
return
;
}
case
value_t
:
:
number_float
:
{
dump_float
(
val
.
m_value
.
number_float
);
return
;
}
case
value_t
:
:
discarded
:
{
o
->
write_characters
(
"<discarded>"
,
11
);
return
;
}
case
value_t
:
:
null
:
{
o
->
write_characters
(
"null"
,
4
);
return
;
}
}
}
private
:
/*!
@brief returns the number of expected bytes following in UTF-8 string
@param[in] u the first byte of a UTF-8 string
@return the number of expected bytes following
*/
static
constexpr
std
::
size_t
bytes_following
(
const
uint8_t
u
)
{
return
((
u
<=
127
)
?
0
:
((
192
<=
u
and
u
<=
223
)
?
1
:
((
224
<=
u
and
u
<=
239
)
?
2
:
((
240
<=
u
and
u
<=
247
)
?
3
:
std
::
string
::
npos
))));
}
/*!
@brief calculates the extra space to escape a JSON string
@param[in] s the string to escape
@param[in] ensure_ascii whether to escape non-ASCII characters with
\uXXXX sequences
@return the number of characters required to escape string @a s
@complexity Linear in the length of string @a s.
*/
static
std
::
size_t
extra_space
(
const
string_t
&
s
,
const
bool
ensure_ascii
)
noexcept
{
std
::
size_t
res
=
0
;
for
(
std
::
size_t
i
=
0
;
i
<
s
.
size
();
++
i
)
{
switch
(
s
[
i
])
{
// control characters that can be escaped with a backslash
case
'"'
:
case
'\\'
:
case
'\b'
:
case
'\f'
:
case
'\n'
:
case
'\r'
:
case
'\t'
:
{
// from c (1 byte) to \x (2 bytes)
res
+=
1
;
break
;
}
// control characters that need \uxxxx escaping
case
0x00
:
case
0x01
:
case
0x02
:
case
0x03
:
case
0x04
:
case
0x05
:
case
0x06
:
case
0x07
:
case
0x0B
:
case
0x0E
:
case
0x0F
:
case
0x10
:
case
0x11
:
case
0x12
:
case
0x13
:
case
0x14
:
case
0x15
:
case
0x16
:
case
0x17
:
case
0x18
:
case
0x19
:
case
0x1A
:
case
0x1B
:
case
0x1C
:
case
0x1D
:
case
0x1E
:
case
0x1F
:
{
// from c (1 byte) to \uxxxx (6 bytes)
res
+=
5
;
break
;
}
default
:
{
if
(
ensure_ascii
and
(
s
[
i
]
&
0x80
or
s
[
i
]
==
0x7F
))
{
const
auto
bytes
=
bytes_following
(
static_cast
<
uint8_t
>
(
s
[
i
]));
// invalid characters will be detected by throw_if_invalid_utf8
assert
(
bytes
!=
std
::
string
::
npos
);
if
(
bytes
==
3
)
{
// codepoints that need 4 bytes (i.e., 3 additional
// bytes) in UTF-8 need a surrogate pair when \u
// escaping is used: from 4 bytes to \uxxxx\uxxxx
// (12 bytes)
res
+=
(
12
-
bytes
-
1
);
}
else
{
// from x bytes to \uxxxx (6 bytes)
res
+=
(
6
-
bytes
-
1
);
}
// skip the additional bytes
i
+=
bytes
;
}
break
;
}
}
}
return
res
;
}
static
void
escape_codepoint
(
int
codepoint
,
string_t
&
result
,
std
::
size_t
&
pos
)
{
// expecting a proper codepoint
assert
(
0x00
<=
codepoint
and
codepoint
<=
0x10FFFF
);
// the last written character was the backslash before the 'u'
assert
(
result
[
pos
]
==
'\\'
);
// write the 'u'
result
[
++
pos
]
=
'u'
;
// convert a number 0..15 to its hex representation (0..f)
static
const
std
::
array
<
char
,
16
>
hexify
=
{
{
'0'
,
'1'
,
'2'
,
'3'
,
'4'
,
'5'
,
'6'
,
'7'
,
'8'
,
'9'
,
'a'
,
'b'
,
'c'
,
'd'
,
'e'
,
'f'
}
};
if
(
codepoint
<
0x10000
)
{
// codepoints U+0000..U+FFFF can be represented as \uxxxx.
result
[
++
pos
]
=
hexify
[(
codepoint
>>
12
)
&
0x0F
];
result
[
++
pos
]
=
hexify
[(
codepoint
>>
8
)
&
0x0F
];
result
[
++
pos
]
=
hexify
[(
codepoint
>>
4
)
&
0x0F
];
result
[
++
pos
]
=
hexify
[
codepoint
&
0x0F
];
}
else
{
// codepoints U+10000..U+10FFFF need a surrogate pair to be
// represented as \uxxxx\uxxxx.
// http://www.unicode.org/faq/utf_bom.html#utf16-4
codepoint
-=
0x10000
;
const
int
high_surrogate
=
0xD800
|
((
codepoint
>>
10
)
&
0x3FF
);
const
int
low_surrogate
=
0xDC00
|
(
codepoint
&
0x3FF
);
result
[
++
pos
]
=
hexify
[(
high_surrogate
>>
12
)
&
0x0F
];
result
[
++
pos
]
=
hexify
[(
high_surrogate
>>
8
)
&
0x0F
];
result
[
++
pos
]
=
hexify
[(
high_surrogate
>>
4
)
&
0x0F
];
result
[
++
pos
]
=
hexify
[
high_surrogate
&
0x0F
];
++
pos
;
// backslash is already in output
result
[
++
pos
]
=
'u'
;
result
[
++
pos
]
=
hexify
[(
low_surrogate
>>
12
)
&
0x0F
];
result
[
++
pos
]
=
hexify
[(
low_surrogate
>>
8
)
&
0x0F
];
result
[
++
pos
]
=
hexify
[(
low_surrogate
>>
4
)
&
0x0F
];
result
[
++
pos
]
=
hexify
[
low_surrogate
&
0x0F
];
}
++
pos
;
}
/*!
@brief dump escaped string
Escape a string by replacing certain special characters by a sequence of an
escape character (backslash) and another character and other control
characters by a sequence of "\u" followed by a four-digit hex
representation. The escaped string is written to output stream @a o.
@param[in] s the string to escape
@param[in] ensure_ascii whether to escape non-ASCII characters with
\uXXXX sequences
@complexity Linear in the length of string @a s.
*/
void
dump_escaped
(
const
string_t
&
s
,
const
bool
ensure_ascii
)
const
{
throw_if_invalid_utf8
(
s
);
const
auto
space
=
extra_space
(
s
,
ensure_ascii
);
if
(
space
==
0
)
{
o
->
write_characters
(
s
.
c_str
(),
s
.
size
());
return
;
}
// create a result string of necessary size
string_t
result
(
s
.
size
()
+
space
,
'\\'
);
std
::
size_t
pos
=
0
;
for
(
std
::
size_t
i
=
0
;
i
<
s
.
size
();
++
i
)
{
switch
(
s
[
i
])
{
case
'"'
:
// quotation mark (0x22)
{
result
[
pos
+
1
]
=
'"'
;
pos
+=
2
;
break
;
}
case
'\\'
:
// reverse solidus (0x5C)
{
// nothing to change
pos
+=
2
;
break
;
}
case
'\b'
:
// backspace (0x08)
{
result
[
pos
+
1
]
=
'b'
;
pos
+=
2
;
break
;
}
case
'\f'
:
// formfeed (0x0C)
{
result
[
pos
+
1
]
=
'f'
;
pos
+=
2
;
break
;
}
case
'\n'
:
// newline (0x0A)
{
result
[
pos
+
1
]
=
'n'
;
pos
+=
2
;
break
;
}
case
'\r'
:
// carriage return (0x0D)
{
result
[
pos
+
1
]
=
'r'
;
pos
+=
2
;
break
;
}
case
'\t'
:
// horizontal tab (0x09)
{
result
[
pos
+
1
]
=
't'
;
pos
+=
2
;
break
;
}
default
:
{
// escape control characters (0x00..0x1F) or, if
// ensure_ascii parameter is used, non-ASCII characters
if
((
0x00
<=
s
[
i
]
and
s
[
i
]
<=
0x1F
)
or
(
ensure_ascii
and
(
s
[
i
]
&
0x80
or
s
[
i
]
==
0x7F
)))
{
const
auto
bytes
=
bytes_following
(
static_cast
<
uint8_t
>
(
s
[
i
]));
// invalid characters will be detected by throw_if_invalid_utf8
assert
(
bytes
!=
std
::
string
::
npos
);
// check that the additional bytes are present
assert
(
i
+
bytes
<
s
.
size
());
// to use \uxxxx escaping, we first need to calculate
// the codepoint from the UTF-8 bytes
int
codepoint
=
0
;
// bytes is unsigned type:
assert
(
bytes
<=
3
);
switch
(
bytes
)
{
case
0
:
{
codepoint
=
s
[
i
]
&
0xFF
;
break
;
}
case
1
:
{
codepoint
=
((
s
[
i
]
&
0x3F
)
<<
6
)
+
(
s
[
i
+
1
]
&
0x7F
);
break
;
}
case
2
:
{
codepoint
=
((
s
[
i
]
&
0x1F
)
<<
12
)
+
((
s
[
i
+
1
]
&
0x7F
)
<<
6
)
+
(
s
[
i
+
2
]
&
0x7F
);
break
;
}
case
3
:
{
codepoint
=
((
s
[
i
]
&
0xF
)
<<
18
)
+
((
s
[
i
+
1
]
&
0x7F
)
<<
12
)
+
((
s
[
i
+
2
]
&
0x7F
)
<<
6
)
+
(
s
[
i
+
3
]
&
0x7F
);
break
;
}
default
:
break
;
// LCOV_EXCL_LINE
}
escape_codepoint
(
codepoint
,
result
,
pos
);
i
+=
bytes
;
}
else
{
// all other characters are added as-is
result
[
pos
++
]
=
s
[
i
];
}
break
;
}
}
}
assert
(
pos
==
result
.
size
());
o
->
write_characters
(
result
.
c_str
(),
result
.
size
());
}
/*!
@brief dump an integer
Dump a given integer to output stream @a o. Works internally with
@a number_buffer.
@param[in] x integer number (signed or unsigned) to dump
@tparam NumberType either @a number_integer_t or @a number_unsigned_t
*/
template
<
typename
NumberType
,
detail
::
enable_if_t
<
std
::
is_same
<
NumberType
,
number_unsigned_t
>::
value
or
std
::
is_same
<
NumberType
,
number_integer_t
>::
value
,
int
>
=
0
>
void
dump_integer
(
NumberType
x
)
{
// special case for "0"
if
(
x
==
0
)
{
o
->
write_character
(
'0'
);
return
;
}
const
bool
is_negative
=
(
x
<=
0
)
and
(
x
!=
0
);
// see issue #755
std
::
size_t
i
=
0
;
while
(
x
!=
0
)
{
// spare 1 byte for '\0'
assert
(
i
<
number_buffer
.
size
()
-
1
);
const
auto
digit
=
std
::
labs
(
static_cast
<
long
>
(
x
%
10
));
number_buffer
[
i
++
]
=
static_cast
<
char
>
(
'0'
+
digit
);
x
/=
10
;
}
if
(
is_negative
)
{
// make sure there is capacity for the '-'
assert
(
i
<
number_buffer
.
size
()
-
2
);
number_buffer
[
i
++
]
=
'-'
;
}
std
::
reverse
(
number_buffer
.
begin
(),
number_buffer
.
begin
()
+
i
);
o
->
write_characters
(
number_buffer
.
data
(),
i
);
}
/*!
@brief dump a floating-point number
Dump a given floating-point number to output stream @a o. Works internally
with @a number_buffer.
@param[in] x floating-point number to dump
*/
void
dump_float
(
number_float_t
x
)
{
// NaN / inf
if
(
not
std
::
isfinite
(
x
)
or
std
::
isnan
(
x
))
{
o
->
write_characters
(
"null"
,
4
);
return
;
}
// get number of digits for a text -> float -> text round-trip
static
constexpr
auto
d
=
std
::
numeric_limits
<
number_float_t
>::
digits10
;
// the actual conversion
std
::
ptrdiff_t
len
=
snprintf
(
number_buffer
.
data
(),
number_buffer
.
size
(),
"%.*g"
,
d
,
x
);
// negative value indicates an error
assert
(
len
>
0
);
// check if buffer was large enough
assert
(
static_cast
<
std
::
size_t
>
(
len
)
<
number_buffer
.
size
());
// erase thousands separator
if
(
thousands_sep
!=
'\0'
)
{
const
auto
end
=
std
::
remove
(
number_buffer
.
begin
(),
number_buffer
.
begin
()
+
len
,
thousands_sep
);
std
::
fill
(
end
,
number_buffer
.
end
(),
'\0'
);
assert
((
end
-
number_buffer
.
begin
())
<=
len
);
len
=
(
end
-
number_buffer
.
begin
());
}
// convert decimal point to '.'
if
(
decimal_point
!=
'\0'
and
decimal_point
!=
'.'
)
{
const
auto
dec_pos
=
std
::
find
(
number_buffer
.
begin
(),
number_buffer
.
end
(),
decimal_point
);
if
(
dec_pos
!=
number_buffer
.
end
())
{
*
dec_pos
=
'.'
;
}
}
o
->
write_characters
(
number_buffer
.
data
(),
static_cast
<
std
::
size_t
>
(
len
));
// determine if need to append ".0"
const
bool
value_is_int_like
=
std
::
none_of
(
number_buffer
.
begin
(),
number_buffer
.
begin
()
+
len
+
1
,
[](
char
c
)
{
return
(
c
==
'.'
or
c
==
'e'
);
});
if
(
value_is_int_like
)
{
o
->
write_characters
(
".0"
,
2
);
}
}
/*!
@brief check whether a string is UTF-8 encoded
The function checks each byte of a string whether it is UTF-8 encoded. The
result of the check is stored in the @a state parameter. The function must
be called initially with state 0 (accept). State 1 means the string must
be rejected, because the current byte is not allowed. If the string is
completely processed, but the state is non-zero, the string ended
prematurely; that is, the last byte indicated more bytes should have
followed.
@param[in,out] state the state of the decoding
@param[in] byte next byte to decode
@note The function has been edited: a std::array is used and the code
point is not calculated.
@copyright Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
@sa http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
*/
static
void
decode
(
uint8_t
&
state
,
const
uint8_t
byte
)
{
static
const
std
::
array
<
uint8_t
,
400
>
utf8d
=
{
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
// 00..1F
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
// 20..3F
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
// 40..5F
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
// 60..7F
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
9
,
9
,
9
,
9
,
9
,
9
,
9
,
9
,
9
,
9
,
9
,
9
,
9
,
9
,
9
,
9
,
// 80..9F
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
// A0..BF
8
,
8
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
// C0..DF
0xA
,
0x3
,
0x3
,
0x3
,
0x3
,
0x3
,
0x3
,
0x3
,
0x3
,
0x3
,
0x3
,
0x3
,
0x3
,
0x4
,
0x3
,
0x3
,
// E0..EF
0xB
,
0x6
,
0x6
,
0x6
,
0x5
,
0x8
,
0x8
,
0x8
,
0x8
,
0x8
,
0x8
,
0x8
,
0x8
,
0x8
,
0x8
,
0x8
,
// F0..FF
0x0
,
0x1
,
0x2
,
0x3
,
0x5
,
0x8
,
0x7
,
0x1
,
0x1
,
0x1
,
0x4
,
0x6
,
0x1
,
0x1
,
0x1
,
0x1
,
// s0..s0
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
1
,
1
,
1
,
1
,
1
,
0
,
1
,
0
,
1
,
1
,
1
,
1
,
1
,
1
,
// s1..s2
1
,
2
,
1
,
1
,
1
,
1
,
1
,
2
,
1
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
// s3..s4
1
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
3
,
1
,
3
,
1
,
1
,
1
,
1
,
1
,
1
,
// s5..s6
1
,
3
,
1
,
1
,
1
,
1
,
1
,
3
,
1
,
3
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
3
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
// s7..s8
}
};
const
uint8_t
type
=
utf8d
[
byte
];
state
=
utf8d
[
256u
+
state
*
16u
+
type
];
}
/*!
@brief throw an exception if a string is not UTF-8 encoded
@param[in] str UTF-8 string to check
@throw type_error.316 if passed string is not UTF-8 encoded
@since version 3.0.0
*/
static
void
throw_if_invalid_utf8
(
const
std
::
string
&
str
)
{
// start with state 0 (= accept)
uint8_t
state
=
0
;
for
(
size_t
i
=
0
;
i
<
str
.
size
();
++
i
)
{
const
auto
byte
=
static_cast
<
uint8_t
>
(
str
[
i
]);
decode
(
state
,
byte
);
if
(
state
==
1
)
{
// state 1 means reject
std
::
stringstream
ss
;
ss
<<
std
::
setw
(
2
)
<<
std
::
uppercase
<<
std
::
setfill
(
'0'
)
<<
std
::
hex
<<
static_cast
<
int
>
(
byte
);
JSON_THROW
(
type_error
::
create
(
316
,
"invalid UTF-8 byte at index "
+
std
::
to_string
(
i
)
+
": 0x"
+
ss
.
str
()));
}
}
if
(
state
!=
0
)
{
// we finish reading, but do not accept: string was incomplete
std
::
stringstream
ss
;
ss
<<
std
::
setw
(
2
)
<<
std
::
uppercase
<<
std
::
setfill
(
'0'
)
<<
std
::
hex
<<
static_cast
<
int
>
(
static_cast
<
uint8_t
>
(
str
.
back
()));
JSON_THROW
(
type_error
::
create
(
316
,
"incomplete UTF-8 string; last byte: 0x"
+
ss
.
str
()));
}
}
private
:
/// the output of the serializer
output_adapter_t
<
char
>
o
=
nullptr
;
/// a (hopefully) large enough character buffer
std
::
array
<
char
,
64
>
number_buffer
{{}};
/// the locale
const
std
::
lconv
*
loc
=
nullptr
;
/// the locale's thousand separator character
const
char
thousands_sep
=
'\0'
;
/// the locale's decimal point character
const
char
decimal_point
=
'\0'
;
/// the indentation character
const
char
indent_char
;
/// the indentation string
string_t
indent_string
;
};
template
<
typename
BasicJsonType
>
template
<
typename
BasicJsonType
>
class
json_ref
class
json_ref
{
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment