Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
J
json
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Chen Yisong
json
Commits
8a4e127a
Commit
8a4e127a
authored
Feb 11, 2015
by
Niels
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
+ moved lexer to class
parent
e845cd1d
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
815 additions
and
906 deletions
+815
-906
json.hpp
src/json.hpp
+576
-687
json.hpp.re2c
src/json.hpp.re2c
+176
-172
unit.cpp
test/unit.cpp
+63
-47
No files found.
src/json.hpp
View file @
8a4e127a
...
...
@@ -14,6 +14,7 @@
#include <type_traits>
#include <utility>
#include <vector>
#include <cmath>
/*!
- ObjectType trick from http://stackoverflow.com/a/9860911
...
...
@@ -2384,9 +2385,9 @@ class basic_json
// parser //
////////////
class
pars
er
class
lex
er
{
p
rivate
:
p
ublic
:
/// token types for the parser
enum
class
token_type
{
...
...
@@ -2406,697 +2407,275 @@ class basic_json
end_of_input
};
/// the type of a lexer character
using
lexer_char_t
=
unsigned
char
;
public
:
/// constructor for strings
inline
parser
(
const
std
::
string
&
s
)
:
buffer
(
s
)
{
// set buffer for RE2C
m_cursor
=
reinterpret_cast
<
const
lexer_char_t
*>
(
buffer
.
c_str
());
// set a pointer past the end of the buffer
m_limit
=
m_cursor
+
buffer
.
size
();
// read first token
get_token
();
}
/// a parser reading from an input stream
inline
parser
(
std
::
istream
&
_is
)
{
while
(
_is
)
{
std
::
string
input_line
;
std
::
getline
(
_is
,
input_line
);
buffer
+=
input_line
;
}
// set buffer for RE2C
m_cursor
=
reinterpret_cast
<
const
lexer_char_t
*>
(
buffer
.
c_str
());
// set a pointer past the end of the buffer
m_limit
=
m_cursor
+
buffer
.
size
();
// read first token
get_token
();
}
inline
basic_json
parse
()
{
switch
(
last_token
)
{
case
(
token_type
:
:
begin_object
)
:
{
// explicitly set result to object to cope with {}
basic_json
result
(
value_t
::
object
);
// read next token
get_token
();
// closing } -> we are done
if
(
last_token
==
token_type
::
end_object
)
{
return
result
;
}
// otherwise: parse key-value pairs
do
{
// store key
expect_new
(
token_type
::
value_string
);
const
auto
key
=
get_string
();
// parse separator (:)
get_token
();
expect_new
(
token_type
::
name_separator
);
// parse value
get_token
();
result
[
key
]
=
parse
();
// read next character
get_token
();
}
while
(
last_token
==
token_type
::
value_separator
and
get_token
()
==
last_token
);
// closing }
expect_new
(
token_type
::
end_object
);
return
result
;
}
case
(
token_type
:
:
begin_array
)
:
{
// explicitly set result to object to cope with []
basic_json
result
(
value_t
::
array
);
// read next token
get_token
();
// closing ] -> we are done
if
(
last_token
==
token_type
::
end_array
)
{
return
result
;
}
// otherwise: parse values
do
{
// parse value
result
.
push_back
(
parse
());
// read next character
get_token
();
}
while
(
last_token
==
token_type
::
value_separator
and
get_token
()
==
last_token
);
// closing ]
expect_new
(
token_type
::
end_array
);
return
result
;
}
case
(
token_type
:
:
literal_null
)
:
{
return
basic_json
(
nullptr
);
}
case
(
token_type
:
:
value_string
)
:
{
return
basic_json
(
get_string
());
}
case
(
token_type
:
:
literal_true
)
:
{
return
basic_json
(
true
);
}
case
(
token_type
:
:
literal_false
)
:
{
return
basic_json
(
false
);
}
case
(
token_type
:
:
value_number
)
:
{
// The pointer m_begin points to the beginning of the
// parsed number. We pass this pointer to std::strtod which
// sets endptr to the first character past the converted
// number. If this pointer is not the same as m_cursor,
// then either more or less characters have been used
// during the comparison. This can happen for inputs like
// "01" which will be treated like number 0 followed by
// number 1.
// conversion
char
*
endptr
;
const
auto
float_val
=
std
::
strtod
(
reinterpret_cast
<
const
char
*>
(
m_begin
),
&
endptr
);
// check if strtod read beyond the end of the lexem
if
(
reinterpret_cast
<
const
lexer_char_t
*>
(
endptr
)
!=
m_cursor
)
{
throw
std
::
invalid_argument
(
std
::
string
(
"parse error - "
)
+
reinterpret_cast
<
const
char
*>
(
m_begin
)
+
" is not a number"
);
}
// check if conversion loses precision
const
auto
int_val
=
static_cast
<
int
>
(
float_val
);
if
(
float_val
==
int_val
)
{
// we basic_json not lose precision -> return int
return
basic_json
(
int_val
);
}
else
{
// we would lose precision -> returnfloat
return
basic_json
(
float_val
);
}
}
default
:
{
std
::
string
error_msg
=
"parse error - unexpected
\'
"
;
error_msg
+=
static_cast
<
char
>
(
m_begin
[
0
]);
error_msg
+=
"
\'
("
;
error_msg
+=
token_type_name
(
last_token
)
+
")"
;
throw
std
::
invalid_argument
(
error_msg
);
}
}
}
private
:
/*!
This function implements a scanner for JSON. It is specified using
regular expressions that try to follow RFC 7159 and ECMA-404 as close
as possible. These regular expressions are then translated into a
deterministic finite automaton (DFA) by the tool RE2C. As a result, the
translated code for this function consists of a large block of code
with goto jumps.
@return the class of the next token read from the buffer
@todo Unicode support needs to be checked.
*/
inline
token_type
get_token
()
{
// needed by RE2C
const
lexer_char_t
*
marker
=
nullptr
;
// set up RE2C
json_parser_lexer_start
:
// set current to the begin of the buffer
m_begin
=
m_cursor
;
if
(
m_begin
==
m_limit
)
{
return
last_token
=
token_type
::
end_of_input
;
}
{
lexer_char_t
yych
;
unsigned
int
yyaccept
=
0
;
static
const
unsigned
char
yybm
[]
=
{
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
192
,
192
,
64
,
64
,
192
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
192
,
64
,
0
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
96
,
96
,
96
,
96
,
96
,
96
,
96
,
96
,
96
,
96
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
0
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
inline
lexer
(
const
char
*
s
)
:
m_content
(
s
)
{
m_start
=
m_cursor
=
m_content
;
m_limit
=
m_content
+
strlen
(
m_content
);
}
inline
lexer
()
=
default
;
#define YYMAXFILL 5
inline
token_type
scan
()
{
#define YYFILL(n)
{
char
yych
;
static
const
unsigned
char
yybm
[]
=
{
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
0
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
192
,
192
,
192
,
192
,
192
,
192
,
192
,
192
,
192
,
192
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
0
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
128
,
};
yych
=
*
m_cursor
;
if
(
yych
<=
'
:
'
)
if
(
yych
<=
'
[
'
)
{
if
(
yych
<=
'
!
'
)
if
(
yych
<=
'
-
'
)
{
if
(
yych
<=
'
\f
'
)
if
(
yych
<=
'
"
'
)
{
if
(
yych
<=
0x0
8
)
if
(
yych
<=
0x0
0
)
{
goto
json_parser_3
;
goto
yy25
;
}
if
(
yych
<=
'\n
'
)
if
(
yych
>=
'"
'
)
{
goto
json_parser_5
;
goto
yy23
;
}
goto
json_parser_3
;
}
else
{
if
(
yych
<=
'
\r
'
)
if
(
yych
<=
'
+
'
)
{
goto
json_parser_5
;
goto
yy2
;
}
if
(
yych
==
'
'
)
if
(
yych
<=
',
'
)
{
goto
json_parser_5
;
goto
yy11
;
}
goto
json_parser_3
;
goto
yy18
;
}
}
else
{
if
(
yych
<=
'
-
'
)
if
(
yych
<=
'
9
'
)
{
if
(
yych
<=
'"'
)
{
goto
json_parser_6
;
}
if
(
yych
<=
'+'
)
if
(
yych
<=
'/'
)
{
goto
json_parser_3
;
goto
yy2
;
}
if
(
yych
<=
'
,
'
)
if
(
yych
<=
'
0
'
)
{
goto
json_parser_7
;
goto
yy19
;
}
goto
json_parser_9
;
goto
yy21
;
}
else
{
if
(
yych
<=
'/'
)
{
goto
json_parser_3
;
}
if
(
yych
<=
'0'
)
if
(
yych
<=
':'
)
{
goto
json_parser_10
;
goto
yy13
;
}
if
(
yych
<=
'9
'
)
if
(
yych
>=
'[
'
)
{
goto
json_parser_12
;
goto
yy3
;
}
goto
json_parser_13
;
}
}
}
else
{
if
(
yych
<=
'm'
)
else
{
if
(
yych
<=
'n'
)
{
if
(
yych
<=
'
\\
'
)
if
(
yych
<=
'
e
'
)
{
if
(
yych
==
'
[
'
)
if
(
yych
==
'
]
'
)
{
goto
json_parser_1
5
;
goto
yy
5
;
}
goto
json_parser_3
;
}
else
{
if
(
yych
<=
'
]
'
)
if
(
yych
<=
'
f
'
)
{
goto
json_parser_
17
;
goto
yy
17
;
}
if
(
yych
==
'f
'
)
if
(
yych
>=
'n
'
)
{
goto
json_parser_19
;
goto
yy15
;
}
goto
json_parser_3
;
}
}
else
{
else
{
if
(
yych
<=
'z'
)
{
if
(
yych
<=
'n'
)
{
goto
json_parser_20
;
}
if
(
yych
==
't'
)
{
goto
json_parser_21
;
goto
yy16
;
}
goto
json_parser_3
;
}
else
{
else
{
if
(
yych
<=
'{'
)
{
goto
json_parser_22
;
goto
yy7
;
}
if
(
yych
==
'}'
)
{
goto
json_parser_24
;
goto
yy9
;
}
goto
json_parser_3
;
}
}
}
json_parser_2
:
yy2:
m_cursor
=
m_marker
;
goto
yy20
;
yy3:
++
m_cursor
;
{
return
token_type
::
begin_array
;
}
yy5:
++
m_cursor
;
{
return
token_type
::
end_array
;
}
yy7:
++
m_cursor
;
{
return
token_type
::
begin_object
;
}
yy9:
++
m_cursor
;
{
return
token_type
::
end_object
;
}
yy11:
++
m_cursor
;
{
return
token_type
::
value_separator
;
}
yy13:
++
m_cursor
;
{
return
token_type
::
name_separator
;
}
yy15:
yych
=
*++
m_cursor
;
if
(
yych
==
'u'
)
{
goto
json_parser_lexer_start
;
goto
yy50
;
}
json_parser_3
:
++
m_cursor
;
json_parser_4
:
goto
yy2
;
yy16:
yych
=
*++
m_cursor
;
if
(
yych
==
'r'
)
{
return
last_token
=
token_type
::
parse_error
;
goto
yy46
;
}
json_parser_5
:
goto
yy2
;
yy17:
yych
=
*++
m_cursor
;
goto
json_parser_60
;
json_parser_6
:
yyaccept
=
0
;
yych
=
*
(
marker
=
++
m_cursor
);
goto
json_parser_51
;
json_parser_7
:
++
m_cursor
;
if
(
yych
==
'a'
)
{
return
last_token
=
token_type
::
value_separator
;
goto
yy41
;
}
json_parser_9
:
goto
yy2
;
yy18:
yych
=
*++
m_cursor
;
if
(
yych
<=
'/'
)
{
goto
json_parser_4
;
goto
yy2
;
}
if
(
yych
<=
'0'
)
{
goto
json_parser_4
9
;
goto
yy1
9
;
}
if
(
yych
<=
'9'
)
{
goto
json_parser_40
;
goto
yy21
;
}
goto
json_parser_4
;
json_parser_10
:
yyaccept
=
1
;
yych
=
*
(
marker
=
++
m_cursor
);
goto
yy2
;
yy19:
yych
=
*
(
m_marker
=
++
m_cursor
);
if
(
yych
<=
'D'
)
{
if
(
yych
==
'.'
)
{
goto
json_parser_42
;
goto
yy34
;
}
}
else
{
else
{
if
(
yych
<=
'E'
)
{
goto
json_parser_43
;
goto
yy35
;
}
if
(
yych
==
'e'
)
{
goto
json_parser_43
;
goto
yy35
;
}
}
json_parser_11
:
{
return
last_token
=
token_type
::
value_number
;
}
json_parser_12
:
yyaccept
=
1
;
yych
=
*
(
marker
=
++
m_cursor
);
goto
json_parser_41
;
json_parser_13
:
++
m_cursor
;
{
return
last_token
=
token_type
::
name_separator
;
}
json_parser_15
:
++
m_cursor
;
{
return
last_token
=
token_type
::
begin_array
;
}
json_parser_17
:
++
m_cursor
;
{
return
last_token
=
token_type
::
end_array
;
}
json_parser_19
:
yyaccept
=
0
;
yych
=
*
(
marker
=
++
m_cursor
);
if
(
yych
==
'a'
)
{
goto
json_parser_35
;
}
goto
json_parser_4
;
json_parser_20
:
yyaccept
=
0
;
yych
=
*
(
marker
=
++
m_cursor
);
if
(
yych
==
'u'
)
{
goto
json_parser_31
;
}
goto
json_parser_4
;
json_parser_21
:
yyaccept
=
0
;
yych
=
*
(
marker
=
++
m_cursor
);
if
(
yych
==
'r'
)
{
goto
json_parser_26
;
}
goto
json_parser_4
;
json_parser_22
:
++
m_cursor
;
{
return
last_token
=
token_type
::
begin_object
;
}
json_parser_24
:
++
m_cursor
;
{
return
last_token
=
token_type
::
end_object
;
}
json_parser_26
:
yych
=
*++
m_cursor
;
if
(
yych
==
'u'
)
{
goto
json_parser_28
;
}
json_parser_27
:
m_cursor
=
marker
;
if
(
yyaccept
==
0
)
{
goto
json_parser_4
;
}
else
{
goto
json_parser_11
;
}
json_parser_28
:
yych
=
*++
m_cursor
;
if
(
yych
!=
'e'
)
{
goto
json_parser_27
;
}
++
m_cursor
;
{
return
last_token
=
token_type
::
literal_true
;
}
json_parser_31
:
yych
=
*++
m_cursor
;
if
(
yych
!=
'l'
)
{
goto
json_parser_27
;
}
yych
=
*++
m_cursor
;
if
(
yych
!=
'l'
)
{
goto
json_parser_27
;
}
++
m_cursor
;
{
return
last_token
=
token_type
::
literal_null
;
}
json_parser_35
:
yych
=
*++
m_cursor
;
if
(
yych
!=
'l'
)
{
goto
json_parser_27
;
}
yych
=
*++
m_cursor
;
if
(
yych
!=
's'
)
{
goto
json_parser_27
;
}
yych
=
*++
m_cursor
;
if
(
yych
!=
'e'
)
{
goto
json_parser_27
;
}
++
m_cursor
;
{
return
last_token
=
token_type
::
literal_false
;
}
json_parser_40
:
yyaccept
=
1
;
marker
=
++
m_cursor
;
yy20:
{
return
token_type
::
value_number
;
}
yy21:
m_marker
=
++
m_cursor
;
yych
=
*
m_cursor
;
json_parser_41
:
if
(
yybm
[
0
+
yych
]
&
32
)
if
(
yybm
[
0
+
yych
]
&
64
)
{
goto
json_parser_40
;
goto
yy21
;
}
if
(
yych
<=
'D'
)
{
if
(
yych
!
=
'.'
)
if
(
yych
=
=
'.'
)
{
goto
json_parser_11
;
goto
yy34
;
}
goto
yy20
;
}
else
{
else
{
if
(
yych
<=
'E'
)
{
goto
json_parser_43
;
goto
yy35
;
}
if
(
yych
==
'e'
)
{
goto
json_parser_43
;
goto
yy35
;
}
goto
json_parser_11
;
goto
yy20
;
}
json_parser_42
:
yych
=
*++
m_cursor
;
if
(
yych
<=
'/'
)
yy23:
++
m_cursor
;
yych
=
*
m_cursor
;
if
(
yybm
[
0
+
yych
]
&
128
)
{
goto
json_parser_27
;
goto
yy23
;
}
if
(
yych
<=
'
9
'
)
if
(
yych
<=
'
"
'
)
{
goto
json_parser_47
;
goto
yy28
;
}
goto
json_parser_27
;
json_parser_43
:
yych
=
*++
m_cursor
;
if
(
yych
<=
','
)
{
if
(
yych
!=
'+'
)
{
goto
json_parser_27
;
}
}
else
{
if
(
yych
<=
'-'
)
{
goto
json_parser_44
;
}
if
(
yych
<=
'/'
)
{
goto
json_parser_27
;
}
if
(
yych
<=
'9'
)
{
goto
json_parser_45
;
}
goto
json_parser_27
;
}
json_parser_44
:
yych
=
*++
m_cursor
;
if
(
yych
<=
'/'
)
{
goto
json_parser_27
;
}
if
(
yych
>=
':'
)
{
goto
json_parser_27
;
}
json_parser_45
:
goto
yy27
;
yy25:
++
m_cursor
;
yych
=
*
m_cursor
;
if
(
yych
<=
'/'
)
{
goto
json_parser_11
;
}
if
(
yych
<=
'9'
)
{
goto
json_parser_45
;
}
goto
json_parser_11
;
json_parser_47
:
yyaccept
=
1
;
marker
=
++
m_cursor
;
yych
=
*
m_cursor
;
if
(
yych
<=
'D'
)
{
if
(
yych
<=
'/'
)
{
goto
json_parser_11
;
}
if
(
yych
<=
'9'
)
{
goto
json_parser_47
;
}
goto
json_parser_11
;
}
else
{
if
(
yych
<=
'E'
)
{
goto
json_parser_43
;
}
if
(
yych
==
'e'
)
{
goto
json_parser_43
;
}
goto
json_parser_11
;
}
json_parser_49
:
yyaccept
=
1
;
yych
=
*
(
marker
=
++
m_cursor
);
if
(
yych
<=
'D'
)
{
if
(
yych
==
'.'
)
{
goto
json_parser_42
;
}
goto
json_parser_11
;
}
else
{
if
(
yych
<=
'E'
)
{
goto
json_parser_43
;
}
if
(
yych
==
'e'
)
{
goto
json_parser_43
;
}
goto
json_parser_11
;
}
json_parser_50
:
++
m_cursor
;
yych
=
*
m_cursor
;
json_parser_51
:
if
(
yybm
[
0
+
yych
]
&
64
)
{
goto
json_parser_50
;
}
if
(
yych
<=
'"'
)
{
goto
json_parser_53
;
}
{
return
token_type
::
end_of_input
;
}
yy27:
++
m_cursor
;
yych
=
*
m_cursor
;
if
(
yych
<=
'e'
)
...
...
@@ -3105,13 +2684,13 @@ json_parser_51:
{
if
(
yych
==
'"'
)
{
goto
json_parser_50
;
goto
yy23
;
}
if
(
yych
<=
'.'
)
{
goto
json_parser_27
;
goto
yy2
;
}
goto
json_parser_50
;
goto
yy23
;
}
else
{
...
...
@@ -3119,271 +2698,581 @@ json_parser_51:
{
if
(
yych
<=
'['
)
{
goto
json_parser_27
;
goto
yy2
;
}
goto
json_parser_50
;
goto
yy23
;
}
else
{
if
(
yych
==
'b'
)
{
goto
json_parser_50
;
goto
yy23
;
}
goto
json_parser_27
;
goto
yy2
;
}
}
}
else
{
else
{
if
(
yych
<=
'q'
)
{
if
(
yych
<=
'f'
)
{
goto
json_parser_50
;
goto
yy23
;
}
if
(
yych
==
'n'
)
{
goto
json_parser_50
;
goto
yy23
;
}
goto
json_parser_27
;
goto
yy2
;
}
else
{
else
{
if
(
yych
<=
's'
)
{
if
(
yych
<=
'r'
)
{
goto
json_parser_50
;
goto
yy23
;
}
goto
json_parser_27
;
goto
yy2
;
}
else
{
else
{
if
(
yych
<=
't'
)
{
goto
json_parser_50
;
goto
yy23
;
}
if
(
yych
<=
'u'
)
{
goto
json_parser_55
;
goto
yy30
;
}
goto
json_parser_27
;
goto
yy2
;
}
}
}
json_parser_53
:
yy28
:
++
m_cursor
;
{
return
last_token
=
token_type
::
value_string
;
}
json_parser_55
:
{
return
token_type
::
value_string
;
}
yy30:
++
m_cursor
;
yych
=
*
m_cursor
;
if
(
yych
<=
'@'
)
{
if
(
yych
<=
'/'
)
{
goto
json_parser_27
;
goto
yy2
;
}
if
(
yych
>=
':'
)
{
goto
json_parser_27
;
goto
yy2
;
}
}
else
{
else
{
if
(
yych
<=
'F'
)
{
goto
json_parser_56
;
goto
yy31
;
}
if
(
yych
<=
'`'
)
{
goto
json_parser_27
;
goto
yy2
;
}
if
(
yych
>=
'g'
)
{
goto
json_parser_27
;
goto
yy2
;
}
}
json_parser_56
:
yy31
:
++
m_cursor
;
yych
=
*
m_cursor
;
if
(
yych
<=
'@'
)
{
if
(
yych
<=
'/'
)
{
goto
json_parser_27
;
goto
yy2
;
}
if
(
yych
>=
':'
)
{
goto
json_parser_27
;
goto
yy2
;
}
}
else
{
else
{
if
(
yych
<=
'F'
)
{
goto
json_parser_57
;
goto
yy32
;
}
if
(
yych
<=
'`'
)
{
goto
json_parser_27
;
goto
yy2
;
}
if
(
yych
>=
'g'
)
{
goto
json_parser_27
;
goto
yy2
;
}
}
json_parser_57
:
yy32
:
++
m_cursor
;
yych
=
*
m_cursor
;
if
(
yych
<=
'@'
)
{
if
(
yych
<=
'/'
)
{
goto
json_parser_27
;
goto
yy2
;
}
if
(
yych
>=
':'
)
{
goto
json_parser_27
;
goto
yy2
;
}
}
else
{
else
{
if
(
yych
<=
'F'
)
{
goto
json_parser_58
;
goto
yy33
;
}
if
(
yych
<=
'`'
)
{
goto
json_parser_27
;
goto
yy2
;
}
if
(
yych
>=
'g'
)
{
goto
json_parser_27
;
goto
yy2
;
}
}
json_parser_58
:
yy33
:
++
m_cursor
;
yych
=
*
m_cursor
;
if
(
yych
<=
'@'
)
{
if
(
yych
<=
'/'
)
{
goto
json_parser_27
;
goto
yy2
;
}
if
(
yych
<=
'9'
)
{
goto
json_parser_50
;
goto
yy23
;
}
goto
json_parser_27
;
goto
yy2
;
}
else
{
else
{
if
(
yych
<=
'F'
)
{
goto
json_parser_50
;
goto
yy23
;
}
if
(
yych
<=
'`'
)
{
goto
json_parser_27
;
goto
yy2
;
}
if
(
yych
<=
'f'
)
{
goto
json_parser_50
;
goto
yy23
;
}
goto
json_parser_27
;
goto
yy2
;
}
yy34:
yych
=
*++
m_cursor
;
if
(
yych
<=
'/'
)
{
goto
yy2
;
}
json_parser_59
:
if
(
yych
<=
'9'
)
{
goto
yy39
;
}
goto
yy2
;
yy35:
yych
=
*++
m_cursor
;
if
(
yych
<=
','
)
{
if
(
yych
!=
'+'
)
{
goto
yy2
;
}
}
else
{
if
(
yych
<=
'-'
)
{
goto
yy36
;
}
if
(
yych
<=
'/'
)
{
goto
yy2
;
}
if
(
yych
<=
'9'
)
{
goto
yy37
;
}
goto
yy2
;
}
yy36:
yych
=
*++
m_cursor
;
if
(
yych
<=
'/'
)
{
goto
yy2
;
}
if
(
yych
>=
':'
)
{
goto
yy2
;
}
yy37:
++
m_cursor
;
yych
=
*
m_cursor
;
json_parser_60
:
if
(
yybm
[
0
+
yych
]
&
128
)
if
(
yych
<=
'/'
)
{
goto
yy20
;
}
if
(
yych
<=
'9'
)
{
goto
yy37
;
}
goto
yy20
;
yy39:
m_marker
=
++
m_cursor
;
yych
=
*
m_cursor
;
if
(
yych
<=
'D'
)
{
if
(
yych
<=
'/'
)
{
goto
yy20
;
}
if
(
yych
<=
'9'
)
{
goto
yy39
;
}
goto
yy20
;
}
else
{
if
(
yych
<=
'E'
)
{
goto
yy35
;
}
if
(
yych
==
'e'
)
{
goto
yy35
;
}
goto
yy20
;
}
yy41:
yych
=
*++
m_cursor
;
if
(
yych
!=
'l'
)
{
goto
yy2
;
}
yych
=
*++
m_cursor
;
if
(
yych
!=
's'
)
{
goto
yy2
;
}
yych
=
*++
m_cursor
;
if
(
yych
!=
'e'
)
{
goto
yy2
;
}
++
m_cursor
;
{
return
token_type
::
literal_false
;
}
yy46:
yych
=
*++
m_cursor
;
if
(
yych
!=
'u'
)
{
goto
yy2
;
}
yych
=
*++
m_cursor
;
if
(
yych
!=
'e'
)
{
goto
json_parser_59
;
goto
yy2
;
}
goto
json_parser_2
;
++
m_cursor
;
{
return
token_type
::
literal_true
;
}
yy50:
yych
=
*++
m_cursor
;
if
(
yych
!=
'l'
)
{
goto
yy2
;
}
yych
=
*++
m_cursor
;
if
(
yych
!=
'l'
)
{
goto
yy2
;
}
++
m_cursor
;
{
return
token_type
::
literal_null
;
}
}
}
inline
static
std
::
string
token_type_name
(
token_type
t
)
inline
std
::
string
get_string_value
()
const
{
return
std
::
string
(
m_start
,
static_cast
<
size_t
>
(
m_cursor
-
m_start
));
}
/*!
The pointer m_begin points to the opening quote of the string, and
m_cursor past the closing quote of the string. We create a std::string from
the character after the opening quotes (m_begin+1) until the character
before the closing quotes (hence subtracting 2 characters from the pointer
difference of the two pointers).
@return string value of current token without opening and closing quotes
@todo Take care of Unicode.
*/
inline
std
::
string
get_string
()
const
{
return
std
::
string
(
m_start
+
1
,
static_cast
<
size_t
>
(
m_cursor
-
m_start
-
2
));
}
inline
number_float_t
get_number
()
const
{
// The pointer m_begin points to the beginning of the
// parsed number. We pass this pointer to std::strtod which
// sets endptr to the first character past the converted
// number. If this pointer is not the same as m_cursor,
// then either more or less characters have been used
// during the comparison. This can happen for inputs like
// "01" which will be treated like number 0 followed by
// number 1.
// conversion
char
*
endptr
;
const
auto
float_val
=
std
::
strtod
(
reinterpret_cast
<
const
char
*>
(
m_start
),
&
endptr
);
// check if strtod read beyond the end of the lexem
if
(
endptr
!=
m_cursor
)
{
std
::
cerr
<<
get_string_value
()
<<
std
::
endl
;
return
NAN
;
}
else
{
return
float_val
;
}
}
private
:
const
char
*
m_content
=
nullptr
;
const
char
*
m_start
=
nullptr
;
const
char
*
m_cursor
=
nullptr
;
const
char
*
m_limit
=
nullptr
;
const
char
*
m_marker
=
nullptr
;
const
char
*
m_ctxmarker
=
nullptr
;
};
class
parser
{
public
:
/// constructor for strings
inline
parser
(
const
std
::
string
&
s
)
:
m_buffer
(
s
),
m_lexer
(
m_buffer
.
c_str
())
{
// read first token
get_token
();
}
/// a parser reading from an input stream
inline
parser
(
std
::
istream
&
_is
)
{
while
(
_is
)
{
std
::
string
input_line
;
std
::
getline
(
_is
,
input_line
);
m_buffer
+=
input_line
;
}
// initializer lexer
m_lexer
=
lexer
(
m_buffer
.
c_str
());
// read first token
get_token
();
}
inline
basic_json
parse
()
{
switch
(
last_token
)
{
case
(
lexer
:
:
token_type
::
begin_object
)
:
{
// explicitly set result to object to cope with {}
basic_json
result
(
value_t
::
object
);
// read next token
get_token
();
// closing } -> we are done
if
(
last_token
==
lexer
::
token_type
::
end_object
)
{
return
result
;
}
// otherwise: parse key-value pairs
do
{
// store key
expect
(
lexer
::
token_type
::
value_string
);
const
auto
key
=
m_lexer
.
get_string
();
// parse separator (:)
get_token
();
expect
(
lexer
::
token_type
::
name_separator
);
// parse value
get_token
();
result
[
key
]
=
parse
();
// read next character
get_token
();
}
while
(
last_token
==
lexer
::
token_type
::
value_separator
and
get_token
()
==
last_token
);
// closing }
expect
(
lexer
::
token_type
::
end_object
);
return
result
;
}
case
(
lexer
:
:
token_type
::
begin_array
)
:
{
// explicitly set result to object to cope with []
basic_json
result
(
value_t
::
array
);
// read next token
get_token
();
// closing ] -> we are done
if
(
last_token
==
lexer
::
token_type
::
end_array
)
{
return
result
;
}
// otherwise: parse values
do
{
// parse value
result
.
push_back
(
parse
());
// read next character
get_token
();
}
while
(
last_token
==
lexer
::
token_type
::
value_separator
and
get_token
()
==
last_token
);
// closing ]
expect
(
lexer
::
token_type
::
end_array
);
return
result
;
}
case
(
lexer
:
:
token_type
::
literal_null
)
:
{
return
basic_json
(
nullptr
);
}
case
(
lexer
:
:
token_type
::
value_string
)
:
{
return
basic_json
(
m_lexer
.
get_string
());
}
case
(
lexer
:
:
token_type
::
literal_true
)
:
{
return
basic_json
(
true
);
}
case
(
lexer
:
:
token_type
::
literal_false
)
:
{
return
basic_json
(
false
);
}
case
(
lexer
:
:
token_type
::
value_number
)
:
{
auto
float_val
=
m_lexer
.
get_number
();
if
(
std
::
isnan
(
float_val
))
{
throw
std
::
invalid_argument
(
std
::
string
(
"parse error - "
)
+
m_lexer
.
get_string_value
()
+
" is not a number"
);
}
// check if conversion loses precision
const
auto
int_val
=
static_cast
<
number_integer_t
>
(
float_val
);
if
(
float_val
==
int_val
)
{
// we basic_json not lose precision -> return int
return
basic_json
(
int_val
);
}
else
{
// we would lose precision -> returnfloat
return
basic_json
(
float_val
);
}
}
default
:
{
std
::
string
error_msg
=
"parse error - unexpected
\'
"
;
error_msg
+=
m_lexer
.
get_string_value
();
error_msg
+=
"
\'
("
;
error_msg
+=
token_type_name
(
last_token
)
+
")"
;
throw
std
::
invalid_argument
(
error_msg
);
}
}
}
private
:
/// get next token from lexer
inline
typename
lexer
::
token_type
get_token
()
{
last_token
=
m_lexer
.
scan
();
return
last_token
;
}
inline
static
std
::
string
token_type_name
(
typename
lexer
::
token_type
t
)
{
switch
(
t
)
{
case
(
token_type
:
:
uninitialized
)
:
case
(
lexer
:
:
token_type
::
uninitialized
)
:
return
"<uninitialized>"
;
case
(
token_type
:
:
literal_true
)
:
case
(
lexer
:
:
token_type
::
literal_true
)
:
return
"true literal"
;
case
(
token_type
:
:
literal_false
)
:
case
(
lexer
:
:
token_type
::
literal_false
)
:
return
"false literal"
;
case
(
token_type
:
:
literal_null
)
:
case
(
lexer
:
:
token_type
::
literal_null
)
:
return
"null literal"
;
case
(
token_type
:
:
value_string
)
:
case
(
lexer
:
:
token_type
::
value_string
)
:
return
"string literal"
;
case
(
token_type
:
:
value_number
)
:
case
(
lexer
:
:
token_type
::
value_number
)
:
return
"number literal"
;
case
(
token_type
:
:
begin_array
)
:
case
(
lexer
:
:
token_type
::
begin_array
)
:
return
"["
;
case
(
token_type
:
:
begin_object
)
:
case
(
lexer
:
:
token_type
::
begin_object
)
:
return
"{"
;
case
(
token_type
:
:
end_array
)
:
case
(
lexer
:
:
token_type
::
end_array
)
:
return
"]"
;
case
(
token_type
:
:
end_object
)
:
case
(
lexer
:
:
token_type
::
end_object
)
:
return
"}"
;
case
(
token_type
:
:
name_separator
)
:
case
(
lexer
:
:
token_type
::
name_separator
)
:
return
":"
;
case
(
token_type
:
:
value_separator
)
:
case
(
lexer
:
:
token_type
::
value_separator
)
:
return
","
;
case
(
token_type
:
:
parse_error
)
:
case
(
lexer
:
:
token_type
::
parse_error
)
:
return
"<parse error>"
;
case
(
token_type
:
:
end_of_input
)
:
case
(
lexer
:
:
token_type
::
end_of_input
)
:
return
"<end of input>"
;
}
}
inline
void
expect
_new
(
token_type
t
)
inline
void
expect
(
typename
lexer
::
token_type
t
)
const
{
if
(
t
!=
last_token
)
{
std
::
string
error_msg
=
"parse error - unexpected
\'
"
;
error_msg
+=
static_cast
<
char
>
(
m_begin
[
0
]
);
error_msg
+=
m_lexer
.
get_string_value
(
);
error_msg
+=
"
\'
("
+
token_type_name
(
last_token
);
error_msg
+=
"); expected "
+
token_type_name
(
t
);
throw
std
::
invalid_argument
(
error_msg
);
}
}
/*!
The pointer m_begin points to the opening quote of the string, and
m_cursor past the closing quote of the string. We create a std::string from
the character after the opening quotes (m_begin+1) until the character
before the closing quotes (hence subtracting 2 characters from the pointer
difference of the two pointers).
@return string value of current token without opening and closing quotes
@todo Take care of Unicode.
*/
inline
std
::
string
get_string
()
const
{
return
std
::
string
(
reinterpret_cast
<
const
char
*>
(
m_begin
+
1
),
static_cast
<
std
::
size_t
>
(
m_cursor
-
m_begin
-
2
)
);
}
private
:
/// the buffer
std
::
string
buffer
;
/// a pointer to the next character to read from the buffer
const
lexer_char_t
*
m_cursor
=
nullptr
;
/// a pointer past the last character of the buffer
const
lexer_char_t
*
m_limit
=
nullptr
;
/// a pointer to the beginning of the current token
const
lexer_char_t
*
m_begin
=
nullptr
;
std
::
string
m_buffer
;
/// the type of the last read token
token_type
last_token
=
token_type
::
uninitialized
;
typename
lexer
::
token_type
last_token
=
lexer
::
token_type
::
uninitialized
;
lexer
m_lexer
;
};
};
...
...
src/json.hpp.re2c
View file @
8a4e127a
...
...
@@ -14,6 +14,7 @@
#include <type_traits>
#include <utility>
#include <vector>
#include <cmath>
/*!
- ObjectType trick from http://stackoverflow.com/a/9860911
...
...
@@ -2384,9 +2385,9 @@ class basic_json
// parser //
////////////
class
pars
er
class
lex
er
{
p
rivate
:
p
ublic
:
/// token types for the parser
enum class token_type
{
...
...
@@ -2406,17 +2407,133 @@ class basic_json
end_of_input
};
/// the type of a lexer character
using lexer_char_t = unsigned char;
inline lexer(const char* s) : m_content(s)
{
m_start = m_cursor = m_content;
m_limit = m_content + strlen(m_content);
}
inline lexer() = default;
/*!max:re2c */
inline token_type scan()
{
#define YYFILL(n)
/*!re2c
re2c:define:YYCURSOR = m_cursor;
re2c:define:YYLIMIT = m_limit;
re2c:define:YYCTYPE = char;
re2c:define:YYCTXMARKER = m_ctxmarker;
re2c:define:YYMARKER = m_marker;
re2c:indent:top = 1;
re2c:yyfill:enable = 0;
// structural characters
"[" { return token_type::begin_array; }
"]" { return token_type::end_array; }
"{" { return token_type::begin_object; }
"}" { return token_type::end_object; }
"," { return token_type::value_separator; }
":" { return token_type::name_separator; }
// literal names
"null" { return token_type::literal_null; }
"true" { return token_type::literal_true; }
"false" { return token_type::literal_false; }
// number
decimal_point = [.];
digit = [0-9];
digit_1_9 = [1-9];
e = [eE];
minus = [-];
plus = [+];
zero = [0];
exp = e (minus|plus)? digit+;
frac = decimal_point digit+;
int = (zero|digit_1_9 digit*);
number = minus? int frac? exp?;
number { return token_type::value_number; }
// string
quotation_mark = [\"];
escape = [\\];
unescaped = [^\"\\];
escaped = escape ([\"\\/bfnrt] | [u][0-9a-fA-F]{4});
char = unescaped | escaped;
string = quotation_mark char* quotation_mark;
string { return token_type::value_string; }
// end of file
'\000' { return token_type::end_of_input; }
*/
}
inline std::string get_string_value() const
{
return std::string(m_start, static_cast<size_t>(m_cursor - m_start));
}
/*!
The pointer m_begin points to the opening quote of the string, and
m_cursor past the closing quote of the string. We create a std::string from
the character after the opening quotes (m_begin+1) until the character
before the closing quotes (hence subtracting 2 characters from the pointer
difference of the two pointers).
@return string value of current token without opening and closing quotes
@todo Take care of Unicode.
*/
inline std::string get_string() const
{
return std::string(m_start + 1, static_cast<size_t>(m_cursor - m_start - 2));
}
inline number_float_t get_number() const
{
// The pointer m_begin points to the beginning of the
// parsed number. We pass this pointer to std::strtod which
// sets endptr to the first character past the converted
// number. If this pointer is not the same as m_cursor,
// then either more or less characters have been used
// during the comparison. This can happen for inputs like
// "01" which will be treated like number 0 followed by
// number 1.
// conversion
char* endptr;
const auto float_val = std::strtod(reinterpret_cast<const char*>(m_start), &endptr);
// check if strtod read beyond the end of the lexem
if (endptr != m_cursor)
{
std::cerr << get_string_value() << std::endl;
return NAN;
}
else
{
return float_val;
}
}
private:
const char* m_content = nullptr;
const char* m_start = nullptr;
const char* m_cursor = nullptr;
const char* m_limit = nullptr;
const char* m_marker = nullptr;
const char* m_ctxmarker = nullptr;
};
class parser
{
public:
/// constructor for strings
inline parser(const std::string& s) :
buffer(s
)
inline parser(const std::string& s) :
m_buffer(s), m_lexer(m_buffer.c_str()
)
{
// set buffer for RE2C
m_cursor = reinterpret_cast<const lexer_char_t*>(buffer.c_str());
// set a pointer past the end of the buffer
m_limit = m_cursor + buffer.size();
// read first token
get_token();
}
...
...
@@ -2428,13 +2545,12 @@ class basic_json
{
std::string input_line;
std::getline(_is, input_line);
buffer += input_line;
m_
buffer += input_line;
}
// set buffer for RE2C
m_cursor = reinterpret_cast<const lexer_char_t*>(buffer.c_str());
// set a pointer past the end of the buffer
m_limit = m_cursor + buffer.size();
// initializer lexer
m_lexer = lexer(m_buffer.c_str());
// read first token
get_token();
}
...
...
@@ -2443,7 +2559,7 @@ class basic_json
{
switch (last_token)
{
case (token_type::begin_object):
case (
lexer::
token_type::begin_object):
{
// explicitly set result to object to cope with {}
basic_json result(value_t::object);
...
...
@@ -2452,7 +2568,7 @@ class basic_json
get_token();
// closing } -> we are done
if (last_token == token_type::end_object)
if (last_token ==
lexer::
token_type::end_object)
{
return result;
}
...
...
@@ -2461,12 +2577,12 @@ class basic_json
do
{
// store key
expect
_new(
token_type::value_string);
const auto key = get_string();
expect
(lexer::
token_type::value_string);
const auto key =
m_lexer.
get_string();
// parse separator (:)
get_token();
expect
_new(
token_type::name_separator);
expect
(lexer::
token_type::name_separator);
// parse value
get_token();
...
...
@@ -2475,16 +2591,16 @@ class basic_json
// read next character
get_token();
}
while (last_token == token_type::value_separator
while (last_token ==
lexer::
token_type::value_separator
and get_token() == last_token);
// closing }
expect
_new(
token_type::end_object);
expect
(lexer::
token_type::end_object);
return result;
}
case (token_type::begin_array):
case (
lexer::
token_type::begin_array):
{
// explicitly set result to object to cope with []
basic_json result(value_t::array);
...
...
@@ -2493,7 +2609,7 @@ class basic_json
get_token();
// closing ] -> we are done
if (last_token == token_type::end_array)
if (last_token ==
lexer::
token_type::end_array)
{
return result;
}
...
...
@@ -2507,59 +2623,47 @@ class basic_json
// read next character
get_token();
}
while (last_token == token_type::value_separator
while (last_token ==
lexer::
token_type::value_separator
and get_token() == last_token);
// closing ]
expect
_new(
token_type::end_array);
expect
(lexer::
token_type::end_array);
return result;
}
case (token_type::literal_null):
case (
lexer::
token_type::literal_null):
{
return basic_json(nullptr);
}
case (token_type::value_string):
case (
lexer::
token_type::value_string):
{
return basic_json(get_string());
return basic_json(
m_lexer.
get_string());
}
case (token_type::literal_true):
case (
lexer::
token_type::literal_true):
{
return basic_json(true);
}
case (token_type::literal_false):
case (
lexer::
token_type::literal_false):
{
return basic_json(false);
}
case (token_type::value_number):
case (
lexer::
token_type::value_number):
{
// The pointer m_begin points to the beginning of the
// parsed number. We pass this pointer to std::strtod which
// sets endptr to the first character past the converted
// number. If this pointer is not the same as m_cursor,
// then either more or less characters have been used
// during the comparison. This can happen for inputs like
// "01" which will be treated like number 0 followed by
// number 1.
auto float_val = m_lexer.get_number();
// conversion
char* endptr;
const auto float_val = std::strtod(reinterpret_cast<const char*>(m_begin), &endptr);
// check if strtod read beyond the end of the lexem
if (reinterpret_cast<const lexer_char_t*>(endptr) != m_cursor)
if (std::isnan(float_val))
{
throw std::invalid_argument(std::string("parse error - ") +
reinterpret_cast<const char*>(m_begin
) + " is not a number");
m_lexer.get_string_value(
) + " is not a number");
}
// check if conversion loses precision
const auto int_val = static_cast<
in
t>(float_val);
const auto int_val = static_cast<
number_integer_
t>(float_val);
if (float_val == int_val)
{
// we basic_json not lose precision -> return int
...
...
@@ -2575,7 +2679,7 @@ class basic_json
default:
{
std::string error_msg = "parse error - unexpected \'";
error_msg +=
static_cast<char>(m_begin[0]
);
error_msg +=
m_lexer.get_string_value(
);
error_msg += "\' (";
error_msg += token_type_name(last_token) + ")";
throw std::invalid_argument(error_msg);
...
...
@@ -2584,166 +2688,66 @@ class basic_json
}
private:
/*!
This function implements a scanner for JSON. It is specified using
regular expressions that try to follow RFC 7159 and ECMA-404 as close
as possible. These regular expressions are then translated into a
deterministic finite automaton (DFA) by the tool RE2C. As a result, the
translated code for this function consists of a large block of code
with goto jumps.
@return the class of the next token read from the buffer
@todo Unicode support needs to be checked.
*/
inline token_type get_token()
/// get next token from lexer
inline typename lexer::token_type get_token()
{
// needed by RE2C
const lexer_char_t* marker = nullptr;
// set up RE2C
/*!re2c
re2c:labelprefix = "json_parser_";
re2c:yyfill:enable = 0;
re2c:define:YYCURSOR = m_cursor;
re2c:define:YYCTYPE = lexer_char_t;
re2c:define:YYMARKER = marker;
re2c:indent:string = " ";
re2c:define:YYLIMIT = m_limit;
*/
json_parser_lexer_start:
// set current to the begin of the buffer
m_begin = m_cursor;
if (m_begin == m_limit)
{
return last_token = token_type::end_of_input;
}
/*!re2c
// whitespace
ws = [ \t\n\r]*;
ws { goto json_parser_lexer_start; }
// structural characters
"[" { return last_token = token_type::begin_array; }
"]" { return last_token = token_type::end_array; }
"{" { return last_token = token_type::begin_object; }
"}" { return last_token = token_type::end_object; }
"," { return last_token = token_type::value_separator; }
":" { return last_token = token_type::name_separator; }
// literal names
"null" { return last_token = token_type::literal_null; }
"true" { return last_token = token_type::literal_true; }
"false" { return last_token = token_type::literal_false; }
// number
decimal_point = [.];
digit = [0-9];
digit_1_9 = [1-9];
e = [eE];
minus = [-];
plus = [+];
zero = [0];
exp = e (minus|plus)? digit+;
frac = decimal_point digit+;
int = (zero|digit_1_9 digit*);
number = minus? int frac? exp?;
number { return last_token = token_type::value_number; }
// string
quotation_mark = [\"];
escape = [\\];
unescaped = [^\"\\];
escaped = escape ([\"\\/bfnrt] | [u][0-9a-fA-F]{4});
char = unescaped | escaped;
string = quotation_mark char* quotation_mark;
string { return last_token = token_type::value_string; }
// anything else is an error
* { return last_token = token_type::parse_error; }
*/
last_token = m_lexer.scan();
return last_token;
}
inline static std::string token_type_name(token_type t)
inline static std::string token_type_name(t
ypename lexer::t
oken_type t)
{
switch (t)
{
case (token_type::uninitialized):
case (
lexer::
token_type::uninitialized):
return "<uninitialized>";
case (token_type::literal_true):
case (
lexer::
token_type::literal_true):
return "true literal";
case (token_type::literal_false):
case (
lexer::
token_type::literal_false):
return "false literal";
case (token_type::literal_null):
case (
lexer::
token_type::literal_null):
return "null literal";
case (token_type::value_string):
case (
lexer::
token_type::value_string):
return "string literal";
case (token_type::value_number):
case (
lexer::
token_type::value_number):
return "number literal";
case (token_type::begin_array):
case (
lexer::
token_type::begin_array):
return "[";
case (token_type::begin_object):
case (
lexer::
token_type::begin_object):
return "{";
case (token_type::end_array):
case (
lexer::
token_type::end_array):
return "]";
case (token_type::end_object):
case (
lexer::
token_type::end_object):
return "}";
case (token_type::name_separator):
case (
lexer::
token_type::name_separator):
return ":";
case (token_type::value_separator):
case (
lexer::
token_type::value_separator):
return ",";
case (token_type::parse_error):
case (
lexer::
token_type::parse_error):
return "<parse error>";
case (token_type::end_of_input):
case (
lexer::
token_type::end_of_input):
return "<end of input>";
}
}
inline void expect
_new(token_type t)
inline void expect
(typename lexer::token_type t) const
{
if (t != last_token)
{
std::string error_msg = "parse error - unexpected \'";
error_msg +=
static_cast<char>(m_begin[0]
);
error_msg +=
m_lexer.get_string_value(
);
error_msg += "\' (" + token_type_name(last_token);
error_msg += "); expected " + token_type_name(t);
throw std::invalid_argument(error_msg);
}
}
/*!
The pointer m_begin points to the opening quote of the string, and
m_cursor past the closing quote of the string. We create a std::string from
the character after the opening quotes (m_begin+1) until the character
before the closing quotes (hence subtracting 2 characters from the pointer
difference of the two pointers).
@return string value of current token without opening and closing quotes
@todo Take care of Unicode.
*/
inline std::string get_string() const
{
return std::string(
reinterpret_cast<const char*>(m_begin + 1),
static_cast<std::size_t>(m_cursor - m_begin - 2)
);
}
private:
/// the buffer
std::string buffer;
/// a pointer to the next character to read from the buffer
const lexer_char_t* m_cursor = nullptr;
/// a pointer past the last character of the buffer
const lexer_char_t* m_limit = nullptr;
/// a pointer to the beginning of the current token
const lexer_char_t* m_begin = nullptr;
std::string m_buffer;
/// the type of the last read token
token_type last_token = token_type::uninitialized;
typename lexer::token_type last_token = lexer::token_type::uninitialized;
lexer m_lexer;
};
};
...
...
test/unit.cpp
View file @
8a4e127a
...
...
@@ -3892,27 +3892,43 @@ TEST_CASE("deserialization")
{
SECTION
(
"string"
)
{
auto
s
=
"[
\"
foo
\"
,1,2,3,false,{
\"
one
\"
:1}]"
;
// auto s = "[\"foo\",1,2,3,false,{\"one\":1}]";
// json j = json::parse(s);
// CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
auto
s
=
"null"
;
json
j
=
json
::
parse
(
s
);
CHECK
(
j
==
json
(
{
"foo"
,
1
,
2
,
3
,
false
,
{{
"one"
,
1
}}}
));
CHECK
(
j
==
json
());
}
SECTION
(
"operator<<"
)
{
// std::stringstream ss;
// ss << "[\"foo\",1,2,3,false,{\"one\":1}]";
// json j;
// j << ss;
// CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
std
::
stringstream
ss
;
ss
<<
"
[
\"
foo
\"
,1,2,3,false,{
\"
one
\"
:1}]
"
;
ss
<<
"
null
"
;
json
j
;
j
<<
ss
;
CHECK
(
j
==
json
(
{
"foo"
,
1
,
2
,
3
,
false
,
{{
"one"
,
1
}}}
));
CHECK
(
j
==
json
());
}
SECTION
(
"operator>>"
)
{
// std::stringstream ss;
// ss << "[\"foo\",1,2,3,false,{\"one\":1}]";
// json j;
// ss >> j;
// CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
std
::
stringstream
ss
;
ss
<<
"
[
\"
foo
\"
,1,2,3,false,{
\"
one
\"
:1}]
"
;
ss
<<
"
null
"
;
json
j
;
ss
>>
j
;
CHECK
(
j
==
json
(
{
"foo"
,
1
,
2
,
3
,
false
,
{{
"one"
,
1
}}}
));
CHECK
(
j
==
json
());
}
}
...
...
@@ -3980,42 +3996,42 @@ TEST_CASE("parser class")
{
SECTION
(
"structural characters"
)
{
CHECK
(
json
::
parser
(
"["
).
last_token
==
json
::
pars
er
::
token_type
::
begin_array
);
CHECK
(
json
::
parser
(
"]"
).
last_token
==
json
::
pars
er
::
token_type
::
end_array
);
CHECK
(
json
::
parser
(
"{"
).
last_token
==
json
::
pars
er
::
token_type
::
begin_object
);
CHECK
(
json
::
parser
(
"}"
).
last_token
==
json
::
pars
er
::
token_type
::
end_object
);
CHECK
(
json
::
parser
(
","
).
last_token
==
json
::
pars
er
::
token_type
::
value_separator
);
CHECK
(
json
::
parser
(
":"
).
last_token
==
json
::
pars
er
::
token_type
::
name_separator
);
CHECK
(
json
::
parser
(
"["
).
last_token
==
json
::
lex
er
::
token_type
::
begin_array
);
CHECK
(
json
::
parser
(
"]"
).
last_token
==
json
::
lex
er
::
token_type
::
end_array
);
CHECK
(
json
::
parser
(
"{"
).
last_token
==
json
::
lex
er
::
token_type
::
begin_object
);
CHECK
(
json
::
parser
(
"}"
).
last_token
==
json
::
lex
er
::
token_type
::
end_object
);
CHECK
(
json
::
parser
(
","
).
last_token
==
json
::
lex
er
::
token_type
::
value_separator
);
CHECK
(
json
::
parser
(
":"
).
last_token
==
json
::
lex
er
::
token_type
::
name_separator
);
}
SECTION
(
"literal names"
)
{
CHECK
(
json
::
parser
(
"null"
).
last_token
==
json
::
pars
er
::
token_type
::
literal_null
);
CHECK
(
json
::
parser
(
"true"
).
last_token
==
json
::
pars
er
::
token_type
::
literal_true
);
CHECK
(
json
::
parser
(
"false"
).
last_token
==
json
::
pars
er
::
token_type
::
literal_false
);
CHECK
(
json
::
parser
(
"null"
).
last_token
==
json
::
lex
er
::
token_type
::
literal_null
);
CHECK
(
json
::
parser
(
"true"
).
last_token
==
json
::
lex
er
::
token_type
::
literal_true
);
CHECK
(
json
::
parser
(
"false"
).
last_token
==
json
::
lex
er
::
token_type
::
literal_false
);
}
SECTION
(
"numbers"
)
{
CHECK
(
json
::
parser
(
"0"
).
last_token
==
json
::
pars
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"1"
).
last_token
==
json
::
pars
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"2"
).
last_token
==
json
::
pars
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"3"
).
last_token
==
json
::
pars
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"4"
).
last_token
==
json
::
pars
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"5"
).
last_token
==
json
::
pars
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"6"
).
last_token
==
json
::
pars
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"7"
).
last_token
==
json
::
pars
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"8"
).
last_token
==
json
::
pars
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"9"
).
last_token
==
json
::
pars
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"0"
).
last_token
==
json
::
lex
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"1"
).
last_token
==
json
::
lex
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"2"
).
last_token
==
json
::
lex
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"3"
).
last_token
==
json
::
lex
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"4"
).
last_token
==
json
::
lex
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"5"
).
last_token
==
json
::
lex
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"6"
).
last_token
==
json
::
lex
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"7"
).
last_token
==
json
::
lex
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"8"
).
last_token
==
json
::
lex
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"9"
).
last_token
==
json
::
lex
er
::
token_type
::
value_number
);
}
SECTION
(
"whitespace"
)
{
CHECK
(
json
::
parser
(
" 0"
).
last_token
==
json
::
pars
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"
\t
0"
).
last_token
==
json
::
pars
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"
\n
0"
).
last_token
==
json
::
pars
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"
\r
0"
).
last_token
==
json
::
pars
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"
\t\n\r\n\t
0"
).
last_token
==
json
::
pars
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
" 0"
).
last_token
==
json
::
lex
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"
\t
0"
).
last_token
==
json
::
lex
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"
\n
0"
).
last_token
==
json
::
lex
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"
\r
0"
).
last_token
==
json
::
lex
er
::
token_type
::
value_number
);
CHECK
(
json
::
parser
(
"
\t\n\r\n\t
0"
).
last_token
==
json
::
lex
er
::
token_type
::
value_number
);
}
/*
...
...
@@ -4049,7 +4065,7 @@ TEST_CASE("parser class")
case ('9'):
case ('"'):
{
CHECK(json::parser(s).last_token != json::
pars
er::token_type::parse_error);
CHECK(json::parser(s).last_token != json::
lex
er::token_type::parse_error);
break;
}
...
...
@@ -4058,13 +4074,13 @@ TEST_CASE("parser class")
case ('\n'):
case ('\r'):
{
CHECK(json::parser(s).last_token == json::
pars
er::token_type::end_of_input);
CHECK(json::parser(s).last_token == json::
lex
er::token_type::end_of_input);
break;
}
default:
{
CHECK(json::parser(s).last_token == json::
pars
er::token_type::parse_error);
CHECK(json::parser(s).last_token == json::
lex
er::token_type::parse_error);
break;
}
}
...
...
@@ -4093,19 +4109,19 @@ TEST_CASE("parser class")
SECTION
(
"token_type_name"
)
{
CHECK
(
json
::
parser
::
token_type_name
(
json
::
pars
er
::
token_type
::
uninitialized
)
==
"<uninitialized>"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
pars
er
::
token_type
::
literal_true
)
==
"true literal"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
pars
er
::
token_type
::
literal_false
)
==
"false literal"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
pars
er
::
token_type
::
literal_null
)
==
"null literal"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
pars
er
::
token_type
::
value_string
)
==
"string literal"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
pars
er
::
token_type
::
value_number
)
==
"number literal"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
pars
er
::
token_type
::
begin_array
)
==
"["
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
pars
er
::
token_type
::
begin_object
)
==
"{"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
pars
er
::
token_type
::
end_array
)
==
"]"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
pars
er
::
token_type
::
end_object
)
==
"}"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
pars
er
::
token_type
::
name_separator
)
==
":"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
pars
er
::
token_type
::
value_separator
)
==
","
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
pars
er
::
token_type
::
parse_error
)
==
"<parse error>"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
pars
er
::
token_type
::
end_of_input
)
==
"<end of input>"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
lex
er
::
token_type
::
uninitialized
)
==
"<uninitialized>"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
lex
er
::
token_type
::
literal_true
)
==
"true literal"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
lex
er
::
token_type
::
literal_false
)
==
"false literal"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
lex
er
::
token_type
::
literal_null
)
==
"null literal"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
lex
er
::
token_type
::
value_string
)
==
"string literal"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
lex
er
::
token_type
::
value_number
)
==
"number literal"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
lex
er
::
token_type
::
begin_array
)
==
"["
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
lex
er
::
token_type
::
begin_object
)
==
"{"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
lex
er
::
token_type
::
end_array
)
==
"]"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
lex
er
::
token_type
::
end_object
)
==
"}"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
lex
er
::
token_type
::
name_separator
)
==
":"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
lex
er
::
token_type
::
value_separator
)
==
","
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
lex
er
::
token_type
::
parse_error
)
==
"<parse error>"
);
CHECK
(
json
::
parser
::
token_type_name
(
json
::
lex
er
::
token_type
::
end_of_input
)
==
"<end of input>"
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment