Commit 8a4e127a by Niels

+ moved lexer to class

parent e845cd1d
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <type_traits> #include <type_traits>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <cmath>
/*! /*!
- ObjectType trick from http://stackoverflow.com/a/9860911 - ObjectType trick from http://stackoverflow.com/a/9860911
...@@ -2384,9 +2385,9 @@ class basic_json ...@@ -2384,9 +2385,9 @@ class basic_json
// parser // // parser //
//////////// ////////////
class parser class lexer
{ {
private: public:
/// token types for the parser /// token types for the parser
enum class token_type enum class token_type
{ {
...@@ -2406,984 +2407,872 @@ class basic_json ...@@ -2406,984 +2407,872 @@ class basic_json
end_of_input end_of_input
}; };
/// the type of a lexer character inline lexer(const char* s) : m_content(s)
using lexer_char_t = unsigned char; {
m_start = m_cursor = m_content;
public: m_limit = m_content + strlen(m_content);
/// constructor for strings }
inline parser(const std::string& s) : buffer(s)
{ inline lexer() = default;
// set buffer for RE2C
m_cursor = reinterpret_cast<const lexer_char_t*>(buffer.c_str()); #define YYMAXFILL 5
// set a pointer past the end of the buffer
m_limit = m_cursor + buffer.size(); inline token_type scan()
// read first token {
get_token(); #define YYFILL(n)
}
{
/// a parser reading from an input stream char yych;
inline parser(std::istream& _is) static const unsigned char yybm[] = {
{ 128, 128, 128, 128, 128, 128, 128, 128,
while (_is) 128, 128, 128, 128, 128, 128, 128, 128,
{ 128, 128, 128, 128, 128, 128, 128, 128,
std::string input_line; 128, 128, 128, 128, 128, 128, 128, 128,
std::getline(_is, input_line); 128, 128, 0, 128, 128, 128, 128, 128,
buffer += input_line; 128, 128, 128, 128, 128, 128, 128, 128,
} 192, 192, 192, 192, 192, 192, 192, 192,
192, 192, 128, 128, 128, 128, 128, 128,
// set buffer for RE2C 128, 128, 128, 128, 128, 128, 128, 128,
m_cursor = reinterpret_cast<const lexer_char_t*>(buffer.c_str()); 128, 128, 128, 128, 128, 128, 128, 128,
// set a pointer past the end of the buffer 128, 128, 128, 128, 128, 128, 128, 128,
m_limit = m_cursor + buffer.size(); 128, 128, 128, 128, 0, 128, 128, 128,
// read first token 128, 128, 128, 128, 128, 128, 128, 128,
get_token(); 128, 128, 128, 128, 128, 128, 128, 128,
} 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
inline basic_json parse() 128, 128, 128, 128, 128, 128, 128, 128,
{ 128, 128, 128, 128, 128, 128, 128, 128,
switch (last_token) 128, 128, 128, 128, 128, 128, 128, 128,
{ 128, 128, 128, 128, 128, 128, 128, 128,
case (token_type::begin_object): 128, 128, 128, 128, 128, 128, 128, 128,
{ 128, 128, 128, 128, 128, 128, 128, 128,
// explicitly set result to object to cope with {} 128, 128, 128, 128, 128, 128, 128, 128,
basic_json result(value_t::object); 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
// read next token 128, 128, 128, 128, 128, 128, 128, 128,
get_token(); 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
// closing } -> we are done 128, 128, 128, 128, 128, 128, 128, 128,
if (last_token == token_type::end_object) 128, 128, 128, 128, 128, 128, 128, 128,
{ 128, 128, 128, 128, 128, 128, 128, 128,
return result; 128, 128, 128, 128, 128, 128, 128, 128,
}
// otherwise: parse key-value pairs
do
{
// store key
expect_new(token_type::value_string);
const auto key = get_string();
// parse separator (:)
get_token();
expect_new(token_type::name_separator);
// parse value
get_token();
result[key] = parse();
// read next character
get_token();
}
while (last_token == token_type::value_separator
and get_token() == last_token);
// closing }
expect_new(token_type::end_object);
return result;
}
case (token_type::begin_array):
{
// explicitly set result to object to cope with []
basic_json result(value_t::array);
// read next token
get_token();
// closing ] -> we are done
if (last_token == token_type::end_array)
{
return result;
}
// otherwise: parse values
do
{
// parse value
result.push_back(parse());
// read next character
get_token();
}
while (last_token == token_type::value_separator
and get_token() == last_token);
// closing ]
expect_new(token_type::end_array);
return result;
}
case (token_type::literal_null):
{
return basic_json(nullptr);
}
case (token_type::value_string):
{
return basic_json(get_string());
}
case (token_type::literal_true):
{
return basic_json(true);
}
case (token_type::literal_false):
{
return basic_json(false);
}
case (token_type::value_number):
{
// The pointer m_begin points to the beginning of the
// parsed number. We pass this pointer to std::strtod which
// sets endptr to the first character past the converted
// number. If this pointer is not the same as m_cursor,
// then either more or less characters have been used
// during the comparison. This can happen for inputs like
// "01" which will be treated like number 0 followed by
// number 1.
// conversion
char* endptr;
const auto float_val = std::strtod(reinterpret_cast<const char*>(m_begin), &endptr);
// check if strtod read beyond the end of the lexem
if (reinterpret_cast<const lexer_char_t*>(endptr) != m_cursor)
{
throw std::invalid_argument(std::string("parse error - ") +
reinterpret_cast<const char*>(m_begin) + " is not a number");
}
// check if conversion loses precision
const auto int_val = static_cast<int>(float_val);
if (float_val == int_val)
{
// we basic_json not lose precision -> return int
return basic_json(int_val);
}
else
{
// we would lose precision -> returnfloat
return basic_json(float_val);
}
}
default:
{
std::string error_msg = "parse error - unexpected \'";
error_msg += static_cast<char>(m_begin[0]);
error_msg += "\' (";
error_msg += token_type_name(last_token) + ")";
throw std::invalid_argument(error_msg);
}
}
}
private:
/*!
This function implements a scanner for JSON. It is specified using
regular expressions that try to follow RFC 7159 and ECMA-404 as close
as possible. These regular expressions are then translated into a
deterministic finite automaton (DFA) by the tool RE2C. As a result, the
translated code for this function consists of a large block of code
with goto jumps.
@return the class of the next token read from the buffer
@todo Unicode support needs to be checked.
*/
inline token_type get_token()
{
// needed by RE2C
const lexer_char_t* marker = nullptr;
// set up RE2C
json_parser_lexer_start:
// set current to the begin of the buffer
m_begin = m_cursor;
if (m_begin == m_limit)
{
return last_token = token_type::end_of_input;
}
{
lexer_char_t yych;
unsigned int yyaccept = 0;
static const unsigned char yybm[] =
{
64, 64, 64, 64, 64, 64, 64, 64,
64, 192, 192, 64, 64, 192, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
192, 64, 0, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
96, 96, 96, 96, 96, 96, 96, 96,
96, 96, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 0, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
}; };
yych = *m_cursor; yych = *m_cursor;
if (yych <= ':') if (yych <= '[')
{ {
if (yych <= '!') if (yych <= '-')
{ {
if (yych <= '\f') if (yych <= '"')
{ {
if (yych <= 0x08) if (yych <= 0x00)
{ {
goto json_parser_3; goto yy25;
} }
if (yych <= '\n') if (yych >= '"')
{ {
goto json_parser_5; goto yy23;
} }
goto json_parser_3;
} }
else else
{ {
if (yych <= '\r') if (yych <= '+')
{ {
goto json_parser_5; goto yy2;
} }
if (yych == ' ') if (yych <= ',')
{ {
goto json_parser_5; goto yy11;
} }
goto json_parser_3; goto yy18;
} }
} }
else else
{ {
if (yych <= '-') if (yych <= '9')
{
if (yych <= '"')
{ {
goto json_parser_6; if (yych <= '/')
}
if (yych <= '+')
{ {
goto json_parser_3; goto yy2;
} }
if (yych <= ',') if (yych <= '0')
{ {
goto json_parser_7; goto yy19;
} }
goto json_parser_9; goto yy21;
} }
else else
{ {
if (yych <= '/') if (yych <= ':')
{
goto json_parser_3;
}
if (yych <= '0')
{ {
goto json_parser_10; goto yy13;
} }
if (yych <= '9') if (yych >= '[')
{ {
goto json_parser_12; goto yy3;
} }
goto json_parser_13;
} }
} }
} }
else else {
{ if (yych <= 'n')
if (yych <= 'm')
{ {
if (yych <= '\\') if (yych <= 'e')
{ {
if (yych == '[') if (yych == ']')
{ {
goto json_parser_15; goto yy5;
} }
goto json_parser_3;
} }
else else
{ {
if (yych <= ']') if (yych <= 'f')
{ {
goto json_parser_17; goto yy17;
} }
if (yych == 'f') if (yych >= 'n')
{ {
goto json_parser_19; goto yy15;
} }
goto json_parser_3;
} }
} }
else else {
{
if (yych <= 'z') if (yych <= 'z')
{ {
if (yych <= 'n')
{
goto json_parser_20;
}
if (yych == 't') if (yych == 't')
{ {
goto json_parser_21; goto yy16;
} }
goto json_parser_3;
} }
else else {
{
if (yych <= '{') if (yych <= '{')
{ {
goto json_parser_22; goto yy7;
} }
if (yych == '}') if (yych == '}')
{ {
goto json_parser_24; goto yy9;
} }
goto json_parser_3;
} }
} }
} }
json_parser_2: yy2:
m_cursor = m_marker;
goto yy20;
yy3:
++m_cursor;
{ return token_type::begin_array; }
yy5:
++m_cursor;
{ return token_type::end_array; }
yy7:
++m_cursor;
{ return token_type::begin_object; }
yy9:
++m_cursor;
{ return token_type::end_object; }
yy11:
++m_cursor;
{ return token_type::value_separator; }
yy13:
++m_cursor;
{ return token_type::name_separator; }
yy15:
yych = *++m_cursor;
if (yych == 'u')
{ {
goto json_parser_lexer_start; goto yy50;
} }
json_parser_3: goto yy2;
++m_cursor; yy16:
json_parser_4: yych = *++m_cursor;
if (yych == 'r')
{ {
return last_token = token_type::parse_error; goto yy46;
} }
json_parser_5: goto yy2;
yy17:
yych = *++m_cursor; yych = *++m_cursor;
goto json_parser_60; if (yych == 'a')
json_parser_6:
yyaccept = 0;
yych = *(marker = ++m_cursor);
goto json_parser_51;
json_parser_7:
++m_cursor;
{ {
return last_token = token_type::value_separator; goto yy41;
} }
json_parser_9: goto yy2;
yy18:
yych = *++m_cursor; yych = *++m_cursor;
if (yych <= '/') if (yych <= '/')
{ {
goto json_parser_4; goto yy2;
} }
if (yych <= '0') if (yych <= '0')
{ {
goto json_parser_49; goto yy19;
} }
if (yych <= '9') if (yych <= '9')
{ {
goto json_parser_40; goto yy21;
} }
goto json_parser_4; goto yy2;
json_parser_10: yy19:
yyaccept = 1; yych = *(m_marker = ++m_cursor);
yych = *(marker = ++m_cursor);
if (yych <= 'D') if (yych <= 'D')
{ {
if (yych == '.') if (yych == '.')
{ {
goto json_parser_42; goto yy34;
} }
} }
else else {
{
if (yych <= 'E') if (yych <= 'E')
{ {
goto json_parser_43; goto yy35;
} }
if (yych == 'e') if (yych == 'e')
{ {
goto json_parser_43; goto yy35;
} }
} }
json_parser_11: yy20:
{ return token_type::value_number; }
yy21:
m_marker = ++m_cursor;
yych = *m_cursor;
if (yybm[0 + yych] & 64)
{ {
return last_token = token_type::value_number; goto yy21;
} }
json_parser_12: if (yych <= 'D')
yyaccept = 1;
yych = *(marker = ++m_cursor);
goto json_parser_41;
json_parser_13:
++m_cursor;
{ {
return last_token = token_type::name_separator; if (yych == '.')
}
json_parser_15:
++m_cursor;
{ {
return last_token = token_type::begin_array; goto yy34;
} }
json_parser_17: goto yy20;
++m_cursor;
{
return last_token = token_type::end_array;
} }
json_parser_19: else {
yyaccept = 0; if (yych <= 'E')
yych = *(marker = ++m_cursor);
if (yych == 'a')
{ {
goto json_parser_35; goto yy35;
} }
goto json_parser_4; if (yych == 'e')
json_parser_20:
yyaccept = 0;
yych = *(marker = ++m_cursor);
if (yych == 'u')
{ {
goto json_parser_31; goto yy35;
} }
goto json_parser_4; goto yy20;
json_parser_21:
yyaccept = 0;
yych = *(marker = ++m_cursor);
if (yych == 'r')
{
goto json_parser_26;
} }
goto json_parser_4; yy23:
json_parser_22:
++m_cursor; ++m_cursor;
yych = *m_cursor;
if (yybm[0 + yych] & 128)
{ {
return last_token = token_type::begin_object; goto yy23;
} }
json_parser_24: if (yych <= '"')
++m_cursor;
{ {
return last_token = token_type::end_object; goto yy28;
} }
json_parser_26: goto yy27;
yych = *++m_cursor; yy25:
if (yych == 'u') ++m_cursor;
{ return token_type::end_of_input; }
yy27:
++m_cursor;
yych = *m_cursor;
if (yych <= 'e')
{
if (yych <= '/')
{
if (yych == '"')
{ {
goto json_parser_28; goto yy23;
} }
json_parser_27: if (yych <= '.')
m_cursor = marker;
if (yyaccept == 0)
{ {
goto json_parser_4; goto yy2;
}
goto yy23;
} }
else else
{ {
goto json_parser_11; if (yych <= '\\')
}
json_parser_28:
yych = *++m_cursor;
if (yych != 'e')
{ {
goto json_parser_27; if (yych <= '[')
}
++m_cursor;
{ {
return last_token = token_type::literal_true; goto yy2;
} }
json_parser_31: goto yy23;
yych = *++m_cursor;
if (yych != 'l')
{
goto json_parser_27;
} }
yych = *++m_cursor; else
if (yych != 'l')
{ {
goto json_parser_27; if (yych == 'b')
}
++m_cursor;
{ {
return last_token = token_type::literal_null; goto yy23;
} }
json_parser_35: goto yy2;
yych = *++m_cursor;
if (yych != 'l')
{
goto json_parser_27;
} }
yych = *++m_cursor;
if (yych != 's')
{
goto json_parser_27;
} }
yych = *++m_cursor;
if (yych != 'e')
{
goto json_parser_27;
} }
++m_cursor; else {
if (yych <= 'q')
{ {
return last_token = token_type::literal_false; if (yych <= 'f')
}
json_parser_40:
yyaccept = 1;
marker = ++m_cursor;
yych = *m_cursor;
json_parser_41:
if (yybm[0 + yych] & 32)
{ {
goto json_parser_40; goto yy23;
} }
if (yych <= 'D') if (yych == 'n')
{
if (yych != '.')
{ {
goto json_parser_11; goto yy23;
} }
goto yy2;
} }
else else {
{ if (yych <= 's')
if (yych <= 'E')
{ {
goto json_parser_43; if (yych <= 'r')
}
if (yych == 'e')
{ {
goto json_parser_43; goto yy23;
} }
goto json_parser_11; goto yy2;
} }
json_parser_42: else {
yych = *++m_cursor; if (yych <= 't')
if (yych <= '/')
{ {
goto json_parser_27; goto yy23;
} }
if (yych <= '9') if (yych <= 'u')
{ {
goto json_parser_47; goto yy30;
} }
goto json_parser_27; goto yy2;
json_parser_43:
yych = *++m_cursor;
if (yych <= ',')
{
if (yych != '+')
{
goto json_parser_27;
} }
} }
else
{
if (yych <= '-')
{
goto json_parser_44;
} }
yy28:
++m_cursor;
{ return token_type::value_string; }
yy30:
++m_cursor;
yych = *m_cursor;
if (yych <= '@')
{
if (yych <= '/') if (yych <= '/')
{ {
goto json_parser_27; goto yy2;
} }
if (yych <= '9') if (yych >= ':')
{ {
goto json_parser_45; goto yy2;
} }
goto json_parser_27;
} }
json_parser_44: else {
yych = *++m_cursor; if (yych <= 'F')
if (yych <= '/')
{ {
goto json_parser_27; goto yy31;
} }
if (yych >= ':') if (yych <= '`')
{ {
goto json_parser_27; goto yy2;
} }
json_parser_45: if (yych >= 'g')
++m_cursor;
yych = *m_cursor;
if (yych <= '/')
{ {
goto json_parser_11; goto yy2;
} }
if (yych <= '9')
{
goto json_parser_45;
} }
goto json_parser_11; yy31:
json_parser_47: ++m_cursor;
yyaccept = 1;
marker = ++m_cursor;
yych = *m_cursor; yych = *m_cursor;
if (yych <= 'D') if (yych <= '@')
{ {
if (yych <= '/') if (yych <= '/')
{ {
goto json_parser_11; goto yy2;
} }
if (yych <= '9') if (yych >= ':')
{ {
goto json_parser_47; goto yy2;
} }
goto json_parser_11;
} }
else else {
{ if (yych <= 'F')
if (yych <= 'E')
{ {
goto json_parser_43; goto yy32;
} }
if (yych == 'e') if (yych <= '`')
{ {
goto json_parser_43; goto yy2;
} }
goto json_parser_11; if (yych >= 'g')
}
json_parser_49:
yyaccept = 1;
yych = *(marker = ++m_cursor);
if (yych <= 'D')
{
if (yych == '.')
{ {
goto json_parser_42; goto yy2;
} }
goto json_parser_11;
} }
else yy32:
++m_cursor;
yych = *m_cursor;
if (yych <= '@')
{ {
if (yych <= 'E') if (yych <= '/')
{ {
goto json_parser_43; goto yy2;
} }
if (yych == 'e') if (yych >= ':')
{ {
goto json_parser_43; goto yy2;
} }
goto json_parser_11;
} }
json_parser_50: else {
++m_cursor; if (yych <= 'F')
yych = *m_cursor;
json_parser_51:
if (yybm[0 + yych] & 64)
{ {
goto json_parser_50; goto yy33;
} }
if (yych <= '"') if (yych <= '`')
{ {
goto json_parser_53; goto yy2;
} }
if (yych >= 'g')
{
goto yy2;
}
}
yy33:
++m_cursor; ++m_cursor;
yych = *m_cursor; yych = *m_cursor;
if (yych <= 'e') if (yych <= '@')
{ {
if (yych <= '/') if (yych <= '/')
{ {
if (yych == '"') goto yy2;
{
goto json_parser_50;
} }
if (yych <= '.') if (yych <= '9')
{ {
goto json_parser_27; goto yy23;
} }
goto json_parser_50; goto yy2;
} }
else else {
{ if (yych <= 'F')
if (yych <= '\\')
{
if (yych <= '[')
{ {
goto json_parser_27; goto yy23;
}
goto json_parser_50;
} }
else if (yych <= '`')
{
if (yych == 'b')
{ {
goto json_parser_50; goto yy2;
}
goto json_parser_27;
} }
}
}
else
{
if (yych <= 'q')
{
if (yych <= 'f') if (yych <= 'f')
{ {
goto json_parser_50; goto yy23;
} }
if (yych == 'n') goto yy2;
{
goto json_parser_50;
} }
goto json_parser_27; yy34:
yych = *++m_cursor;
if (yych <= '/')
{
goto yy2;
} }
else if (yych <= '9')
{ {
if (yych <= 's') goto yy39;
}
goto yy2;
yy35:
yych = *++m_cursor;
if (yych <= ',')
{ {
if (yych <= 'r') if (yych != '+')
{ {
goto json_parser_50; goto yy2;
} }
goto json_parser_27;
} }
else else {
if (yych <= '-')
{ {
if (yych <= 't') goto yy36;
}
if (yych <= '/')
{ {
goto json_parser_50; goto yy2;
} }
if (yych <= 'u') if (yych <= '9')
{ {
goto json_parser_55; goto yy37;
} }
goto json_parser_27; goto yy2;
} }
yy36:
yych = *++m_cursor;
if (yych <= '/')
{
goto yy2;
} }
if (yych >= ':')
{
goto yy2;
} }
json_parser_53: yy37:
++m_cursor; ++m_cursor;
yych = *m_cursor;
if (yych <= '/')
{ {
return last_token = token_type::value_string; goto yy20;
} }
json_parser_55: if (yych <= '9')
++m_cursor; {
goto yy37;
}
goto yy20;
yy39:
m_marker = ++m_cursor;
yych = *m_cursor; yych = *m_cursor;
if (yych <= '@') if (yych <= 'D')
{ {
if (yych <= '/') if (yych <= '/')
{ {
goto json_parser_27; goto yy20;
} }
if (yych >= ':') if (yych <= '9')
{ {
goto json_parser_27; goto yy39;
} }
goto yy20;
} }
else else {
if (yych <= 'E')
{ {
if (yych <= 'F') goto yy35;
}
if (yych == 'e')
{ {
goto json_parser_56; goto yy35;
} }
if (yych <= '`') goto yy20;
}
yy41:
yych = *++m_cursor;
if (yych != 'l')
{ {
goto json_parser_27; goto yy2;
} }
if (yych >= 'g') yych = *++m_cursor;
if (yych != 's')
{ {
goto json_parser_27; goto yy2;
} }
yych = *++m_cursor;
if (yych != 'e')
{
goto yy2;
} }
json_parser_56:
++m_cursor; ++m_cursor;
yych = *m_cursor; { return token_type::literal_false; }
if (yych <= '@') yy46:
yych = *++m_cursor;
if (yych != 'u')
{ {
if (yych <= '/') goto yy2;
}
yych = *++m_cursor;
if (yych != 'e')
{ {
goto json_parser_27; goto yy2;
} }
if (yych >= ':') ++m_cursor;
{ return token_type::literal_true; }
yy50:
yych = *++m_cursor;
if (yych != 'l')
{ {
goto json_parser_27; goto yy2;
} }
yych = *++m_cursor;
if (yych != 'l')
{
goto yy2;
} }
else ++m_cursor;
{ return token_type::literal_null; }
}
}
inline std::string get_string_value() const
{ {
if (yych <= 'F') return std::string(m_start, static_cast<size_t>(m_cursor - m_start));
}
/*!
The pointer m_begin points to the opening quote of the string, and
m_cursor past the closing quote of the string. We create a std::string from
the character after the opening quotes (m_begin+1) until the character
before the closing quotes (hence subtracting 2 characters from the pointer
difference of the two pointers).
@return string value of current token without opening and closing quotes
@todo Take care of Unicode.
*/
inline std::string get_string() const
{ {
goto json_parser_57; return std::string(m_start + 1, static_cast<size_t>(m_cursor - m_start - 2));
} }
if (yych <= '`')
inline number_float_t get_number() const
{
// The pointer m_begin points to the beginning of the
// parsed number. We pass this pointer to std::strtod which
// sets endptr to the first character past the converted
// number. If this pointer is not the same as m_cursor,
// then either more or less characters have been used
// during the comparison. This can happen for inputs like
// "01" which will be treated like number 0 followed by
// number 1.
// conversion
char* endptr;
const auto float_val = std::strtod(reinterpret_cast<const char*>(m_start), &endptr);
// check if strtod read beyond the end of the lexem
if (endptr != m_cursor)
{ {
goto json_parser_27; std::cerr << get_string_value() << std::endl;
return NAN;
} }
if (yych >= 'g') else
{ {
goto json_parser_27; return float_val;
} }
} }
json_parser_57:
++m_cursor; private:
yych = *m_cursor; const char* m_content = nullptr;
if (yych <= '@')
const char* m_start = nullptr;
const char* m_cursor = nullptr;
const char* m_limit = nullptr;
const char* m_marker = nullptr;
const char* m_ctxmarker = nullptr;
};
class parser
{ {
if (yych <= '/') public:
/// constructor for strings
inline parser(const std::string& s) : m_buffer(s), m_lexer(m_buffer.c_str())
{ {
goto json_parser_27; // read first token
get_token();
} }
if (yych >= ':')
/// a parser reading from an input stream
inline parser(std::istream& _is)
{ {
goto json_parser_27; while (_is)
{
std::string input_line;
std::getline(_is, input_line);
m_buffer += input_line;
} }
// initializer lexer
m_lexer = lexer(m_buffer.c_str());
// read first token
get_token();
} }
else
inline basic_json parse()
{ {
if (yych <= 'F') switch (last_token)
{
case (lexer::token_type::begin_object):
{
// explicitly set result to object to cope with {}
basic_json result(value_t::object);
// read next token
get_token();
// closing } -> we are done
if (last_token == lexer::token_type::end_object)
{ {
goto json_parser_58; return result;
} }
if (yych <= '`')
// otherwise: parse key-value pairs
do
{
// store key
expect(lexer::token_type::value_string);
const auto key = m_lexer.get_string();
// parse separator (:)
get_token();
expect(lexer::token_type::name_separator);
// parse value
get_token();
result[key] = parse();
// read next character
get_token();
}
while (last_token == lexer::token_type::value_separator
and get_token() == last_token);
// closing }
expect(lexer::token_type::end_object);
return result;
}
case (lexer::token_type::begin_array):
{
// explicitly set result to object to cope with []
basic_json result(value_t::array);
// read next token
get_token();
// closing ] -> we are done
if (last_token == lexer::token_type::end_array)
{ {
goto json_parser_27; return result;
} }
if (yych >= 'g')
// otherwise: parse values
do
{ {
goto json_parser_27; // parse value
result.push_back(parse());
// read next character
get_token();
} }
while (last_token == lexer::token_type::value_separator
and get_token() == last_token);
// closing ]
expect(lexer::token_type::end_array);
return result;
} }
json_parser_58:
++m_cursor; case (lexer::token_type::literal_null):
yych = *m_cursor;
if (yych <= '@')
{ {
if (yych <= '/') return basic_json(nullptr);
}
case (lexer::token_type::value_string):
{ {
goto json_parser_27; return basic_json(m_lexer.get_string());
} }
if (yych <= '9')
case (lexer::token_type::literal_true):
{ {
goto json_parser_50; return basic_json(true);
} }
goto json_parser_27;
case (lexer::token_type::literal_false):
{
return basic_json(false);
} }
else
case (lexer::token_type::value_number):
{ {
if (yych <= 'F') auto float_val = m_lexer.get_number();
if (std::isnan(float_val))
{ {
goto json_parser_50; throw std::invalid_argument(std::string("parse error - ") +
m_lexer.get_string_value() + " is not a number");
} }
if (yych <= '`')
// check if conversion loses precision
const auto int_val = static_cast<number_integer_t>(float_val);
if (float_val == int_val)
{ {
goto json_parser_27; // we basic_json not lose precision -> return int
return basic_json(int_val);
} }
if (yych <= 'f') else
{ {
goto json_parser_50; // we would lose precision -> returnfloat
return basic_json(float_val);
} }
goto json_parser_27;
} }
json_parser_59:
++m_cursor; default:
yych = *m_cursor;
json_parser_60:
if (yybm[0 + yych] & 128)
{ {
goto json_parser_59; std::string error_msg = "parse error - unexpected \'";
error_msg += m_lexer.get_string_value();
error_msg += "\' (";
error_msg += token_type_name(last_token) + ")";
throw std::invalid_argument(error_msg);
}
} }
goto json_parser_2;
} }
private:
/// get next token from lexer
inline typename lexer::token_type get_token()
{
last_token = m_lexer.scan();
return last_token;
} }
inline static std::string token_type_name(token_type t) inline static std::string token_type_name(typename lexer::token_type t)
{ {
switch (t) switch (t)
{ {
case (token_type::uninitialized): case (lexer::token_type::uninitialized):
return "<uninitialized>"; return "<uninitialized>";
case (token_type::literal_true): case (lexer::token_type::literal_true):
return "true literal"; return "true literal";
case (token_type::literal_false): case (lexer::token_type::literal_false):
return "false literal"; return "false literal";
case (token_type::literal_null): case (lexer::token_type::literal_null):
return "null literal"; return "null literal";
case (token_type::value_string): case (lexer::token_type::value_string):
return "string literal"; return "string literal";
case (token_type::value_number): case (lexer::token_type::value_number):
return "number literal"; return "number literal";
case (token_type::begin_array): case (lexer::token_type::begin_array):
return "["; return "[";
case (token_type::begin_object): case (lexer::token_type::begin_object):
return "{"; return "{";
case (token_type::end_array): case (lexer::token_type::end_array):
return "]"; return "]";
case (token_type::end_object): case (lexer::token_type::end_object):
return "}"; return "}";
case (token_type::name_separator): case (lexer::token_type::name_separator):
return ":"; return ":";
case (token_type::value_separator): case (lexer::token_type::value_separator):
return ","; return ",";
case (token_type::parse_error): case (lexer::token_type::parse_error):
return "<parse error>"; return "<parse error>";
case (token_type::end_of_input): case (lexer::token_type::end_of_input):
return "<end of input>"; return "<end of input>";
} }
} }
inline void expect_new(token_type t) inline void expect(typename lexer::token_type t) const
{ {
if (t != last_token) if (t != last_token)
{ {
std::string error_msg = "parse error - unexpected \'"; std::string error_msg = "parse error - unexpected \'";
error_msg += static_cast<char>(m_begin[0]); error_msg += m_lexer.get_string_value();
error_msg += "\' (" + token_type_name(last_token); error_msg += "\' (" + token_type_name(last_token);
error_msg += "); expected " + token_type_name(t); error_msg += "); expected " + token_type_name(t);
throw std::invalid_argument(error_msg); throw std::invalid_argument(error_msg);
} }
} }
/*!
The pointer m_begin points to the opening quote of the string, and
m_cursor past the closing quote of the string. We create a std::string from
the character after the opening quotes (m_begin+1) until the character
before the closing quotes (hence subtracting 2 characters from the pointer
difference of the two pointers).
@return string value of current token without opening and closing quotes
@todo Take care of Unicode.
*/
inline std::string get_string() const
{
return std::string(
reinterpret_cast<const char*>(m_begin + 1),
static_cast<std::size_t>(m_cursor - m_begin - 2)
);
}
private: private:
/// the buffer /// the buffer
std::string buffer; std::string m_buffer;
/// a pointer to the next character to read from the buffer
const lexer_char_t* m_cursor = nullptr;
/// a pointer past the last character of the buffer
const lexer_char_t* m_limit = nullptr;
/// a pointer to the beginning of the current token
const lexer_char_t* m_begin = nullptr;
/// the type of the last read token /// the type of the last read token
token_type last_token = token_type::uninitialized; typename lexer::token_type last_token = lexer::token_type::uninitialized;
lexer m_lexer;
}; };
}; };
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <type_traits> #include <type_traits>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <cmath>
/*! /*!
- ObjectType trick from http://stackoverflow.com/a/9860911 - ObjectType trick from http://stackoverflow.com/a/9860911
...@@ -2384,9 +2385,9 @@ class basic_json ...@@ -2384,9 +2385,9 @@ class basic_json
// parser // // parser //
//////////// ////////////
class parser class lexer
{ {
private: public:
/// token types for the parser /// token types for the parser
enum class token_type enum class token_type
{ {
...@@ -2406,17 +2407,133 @@ class basic_json ...@@ -2406,17 +2407,133 @@ class basic_json
end_of_input end_of_input
}; };
/// the type of a lexer character inline lexer(const char* s) : m_content(s)
using lexer_char_t = unsigned char; {
m_start = m_cursor = m_content;
m_limit = m_content + strlen(m_content);
}
inline lexer() = default;
/*!max:re2c */
inline token_type scan()
{
#define YYFILL(n)
/*!re2c
re2c:define:YYCURSOR = m_cursor;
re2c:define:YYLIMIT = m_limit;
re2c:define:YYCTYPE = char;
re2c:define:YYCTXMARKER = m_ctxmarker;
re2c:define:YYMARKER = m_marker;
re2c:indent:top = 1;
re2c:yyfill:enable = 0;
// structural characters
"[" { return token_type::begin_array; }
"]" { return token_type::end_array; }
"{" { return token_type::begin_object; }
"}" { return token_type::end_object; }
"," { return token_type::value_separator; }
":" { return token_type::name_separator; }
// literal names
"null" { return token_type::literal_null; }
"true" { return token_type::literal_true; }
"false" { return token_type::literal_false; }
// number
decimal_point = [.];
digit = [0-9];
digit_1_9 = [1-9];
e = [eE];
minus = [-];
plus = [+];
zero = [0];
exp = e (minus|plus)? digit+;
frac = decimal_point digit+;
int = (zero|digit_1_9 digit*);
number = minus? int frac? exp?;
number { return token_type::value_number; }
// string
quotation_mark = [\"];
escape = [\\];
unescaped = [^\"\\];
escaped = escape ([\"\\/bfnrt] | [u][0-9a-fA-F]{4});
char = unescaped | escaped;
string = quotation_mark char* quotation_mark;
string { return token_type::value_string; }
// end of file
'\000' { return token_type::end_of_input; }
*/
}
inline std::string get_string_value() const
{
return std::string(m_start, static_cast<size_t>(m_cursor - m_start));
}
/*!
The pointer m_begin points to the opening quote of the string, and
m_cursor past the closing quote of the string. We create a std::string from
the character after the opening quotes (m_begin+1) until the character
before the closing quotes (hence subtracting 2 characters from the pointer
difference of the two pointers).
@return string value of current token without opening and closing quotes
@todo Take care of Unicode.
*/
inline std::string get_string() const
{
return std::string(m_start + 1, static_cast<size_t>(m_cursor - m_start - 2));
}
inline number_float_t get_number() const
{
// The pointer m_begin points to the beginning of the
// parsed number. We pass this pointer to std::strtod which
// sets endptr to the first character past the converted
// number. If this pointer is not the same as m_cursor,
// then either more or less characters have been used
// during the comparison. This can happen for inputs like
// "01" which will be treated like number 0 followed by
// number 1.
// conversion
char* endptr;
const auto float_val = std::strtod(reinterpret_cast<const char*>(m_start), &endptr);
// check if strtod read beyond the end of the lexem
if (endptr != m_cursor)
{
std::cerr << get_string_value() << std::endl;
return NAN;
}
else
{
return float_val;
}
}
private:
const char* m_content = nullptr;
const char* m_start = nullptr;
const char* m_cursor = nullptr;
const char* m_limit = nullptr;
const char* m_marker = nullptr;
const char* m_ctxmarker = nullptr;
};
class parser
{
public: public:
/// constructor for strings /// constructor for strings
inline parser(const std::string& s) : buffer(s) inline parser(const std::string& s) : m_buffer(s), m_lexer(m_buffer.c_str())
{ {
// set buffer for RE2C
m_cursor = reinterpret_cast<const lexer_char_t*>(buffer.c_str());
// set a pointer past the end of the buffer
m_limit = m_cursor + buffer.size();
// read first token // read first token
get_token(); get_token();
} }
...@@ -2428,13 +2545,12 @@ class basic_json ...@@ -2428,13 +2545,12 @@ class basic_json
{ {
std::string input_line; std::string input_line;
std::getline(_is, input_line); std::getline(_is, input_line);
buffer += input_line; m_buffer += input_line;
} }
// set buffer for RE2C // initializer lexer
m_cursor = reinterpret_cast<const lexer_char_t*>(buffer.c_str()); m_lexer = lexer(m_buffer.c_str());
// set a pointer past the end of the buffer
m_limit = m_cursor + buffer.size();
// read first token // read first token
get_token(); get_token();
} }
...@@ -2443,7 +2559,7 @@ class basic_json ...@@ -2443,7 +2559,7 @@ class basic_json
{ {
switch (last_token) switch (last_token)
{ {
case (token_type::begin_object): case (lexer::token_type::begin_object):
{ {
// explicitly set result to object to cope with {} // explicitly set result to object to cope with {}
basic_json result(value_t::object); basic_json result(value_t::object);
...@@ -2452,7 +2568,7 @@ class basic_json ...@@ -2452,7 +2568,7 @@ class basic_json
get_token(); get_token();
// closing } -> we are done // closing } -> we are done
if (last_token == token_type::end_object) if (last_token == lexer::token_type::end_object)
{ {
return result; return result;
} }
...@@ -2461,12 +2577,12 @@ class basic_json ...@@ -2461,12 +2577,12 @@ class basic_json
do do
{ {
// store key // store key
expect_new(token_type::value_string); expect(lexer::token_type::value_string);
const auto key = get_string(); const auto key = m_lexer.get_string();
// parse separator (:) // parse separator (:)
get_token(); get_token();
expect_new(token_type::name_separator); expect(lexer::token_type::name_separator);
// parse value // parse value
get_token(); get_token();
...@@ -2475,16 +2591,16 @@ class basic_json ...@@ -2475,16 +2591,16 @@ class basic_json
// read next character // read next character
get_token(); get_token();
} }
while (last_token == token_type::value_separator while (last_token == lexer::token_type::value_separator
and get_token() == last_token); and get_token() == last_token);
// closing } // closing }
expect_new(token_type::end_object); expect(lexer::token_type::end_object);
return result; return result;
} }
case (token_type::begin_array): case (lexer::token_type::begin_array):
{ {
// explicitly set result to object to cope with [] // explicitly set result to object to cope with []
basic_json result(value_t::array); basic_json result(value_t::array);
...@@ -2493,7 +2609,7 @@ class basic_json ...@@ -2493,7 +2609,7 @@ class basic_json
get_token(); get_token();
// closing ] -> we are done // closing ] -> we are done
if (last_token == token_type::end_array) if (last_token == lexer::token_type::end_array)
{ {
return result; return result;
} }
...@@ -2507,59 +2623,47 @@ class basic_json ...@@ -2507,59 +2623,47 @@ class basic_json
// read next character // read next character
get_token(); get_token();
} }
while (last_token == token_type::value_separator while (last_token == lexer::token_type::value_separator
and get_token() == last_token); and get_token() == last_token);
// closing ] // closing ]
expect_new(token_type::end_array); expect(lexer::token_type::end_array);
return result; return result;
} }
case (token_type::literal_null): case (lexer::token_type::literal_null):
{ {
return basic_json(nullptr); return basic_json(nullptr);
} }
case (token_type::value_string): case (lexer::token_type::value_string):
{ {
return basic_json(get_string()); return basic_json(m_lexer.get_string());
} }
case (token_type::literal_true): case (lexer::token_type::literal_true):
{ {
return basic_json(true); return basic_json(true);
} }
case (token_type::literal_false): case (lexer::token_type::literal_false):
{ {
return basic_json(false); return basic_json(false);
} }
case (token_type::value_number): case (lexer::token_type::value_number):
{ {
// The pointer m_begin points to the beginning of the auto float_val = m_lexer.get_number();
// parsed number. We pass this pointer to std::strtod which
// sets endptr to the first character past the converted
// number. If this pointer is not the same as m_cursor,
// then either more or less characters have been used
// during the comparison. This can happen for inputs like
// "01" which will be treated like number 0 followed by
// number 1.
// conversion if (std::isnan(float_val))
char* endptr;
const auto float_val = std::strtod(reinterpret_cast<const char*>(m_begin), &endptr);
// check if strtod read beyond the end of the lexem
if (reinterpret_cast<const lexer_char_t*>(endptr) != m_cursor)
{ {
throw std::invalid_argument(std::string("parse error - ") + throw std::invalid_argument(std::string("parse error - ") +
reinterpret_cast<const char*>(m_begin) + " is not a number"); m_lexer.get_string_value() + " is not a number");
} }
// check if conversion loses precision // check if conversion loses precision
const auto int_val = static_cast<int>(float_val); const auto int_val = static_cast<number_integer_t>(float_val);
if (float_val == int_val) if (float_val == int_val)
{ {
// we basic_json not lose precision -> return int // we basic_json not lose precision -> return int
...@@ -2575,7 +2679,7 @@ class basic_json ...@@ -2575,7 +2679,7 @@ class basic_json
default: default:
{ {
std::string error_msg = "parse error - unexpected \'"; std::string error_msg = "parse error - unexpected \'";
error_msg += static_cast<char>(m_begin[0]); error_msg += m_lexer.get_string_value();
error_msg += "\' ("; error_msg += "\' (";
error_msg += token_type_name(last_token) + ")"; error_msg += token_type_name(last_token) + ")";
throw std::invalid_argument(error_msg); throw std::invalid_argument(error_msg);
...@@ -2584,166 +2688,66 @@ class basic_json ...@@ -2584,166 +2688,66 @@ class basic_json
} }
private: private:
/*! /// get next token from lexer
This function implements a scanner for JSON. It is specified using inline typename lexer::token_type get_token()
regular expressions that try to follow RFC 7159 and ECMA-404 as close
as possible. These regular expressions are then translated into a
deterministic finite automaton (DFA) by the tool RE2C. As a result, the
translated code for this function consists of a large block of code
with goto jumps.
@return the class of the next token read from the buffer
@todo Unicode support needs to be checked.
*/
inline token_type get_token()
{ {
// needed by RE2C last_token = m_lexer.scan();
const lexer_char_t* marker = nullptr; return last_token;
// set up RE2C
/*!re2c
re2c:labelprefix = "json_parser_";
re2c:yyfill:enable = 0;
re2c:define:YYCURSOR = m_cursor;
re2c:define:YYCTYPE = lexer_char_t;
re2c:define:YYMARKER = marker;
re2c:indent:string = " ";
re2c:define:YYLIMIT = m_limit;
*/
json_parser_lexer_start:
// set current to the begin of the buffer
m_begin = m_cursor;
if (m_begin == m_limit)
{
return last_token = token_type::end_of_input;
}
/*!re2c
// whitespace
ws = [ \t\n\r]*;
ws { goto json_parser_lexer_start; }
// structural characters
"[" { return last_token = token_type::begin_array; }
"]" { return last_token = token_type::end_array; }
"{" { return last_token = token_type::begin_object; }
"}" { return last_token = token_type::end_object; }
"," { return last_token = token_type::value_separator; }
":" { return last_token = token_type::name_separator; }
// literal names
"null" { return last_token = token_type::literal_null; }
"true" { return last_token = token_type::literal_true; }
"false" { return last_token = token_type::literal_false; }
// number
decimal_point = [.];
digit = [0-9];
digit_1_9 = [1-9];
e = [eE];
minus = [-];
plus = [+];
zero = [0];
exp = e (minus|plus)? digit+;
frac = decimal_point digit+;
int = (zero|digit_1_9 digit*);
number = minus? int frac? exp?;
number { return last_token = token_type::value_number; }
// string
quotation_mark = [\"];
escape = [\\];
unescaped = [^\"\\];
escaped = escape ([\"\\/bfnrt] | [u][0-9a-fA-F]{4});
char = unescaped | escaped;
string = quotation_mark char* quotation_mark;
string { return last_token = token_type::value_string; }
// anything else is an error
* { return last_token = token_type::parse_error; }
*/
} }
inline static std::string token_type_name(token_type t) inline static std::string token_type_name(typename lexer::token_type t)
{ {
switch (t) switch (t)
{ {
case (token_type::uninitialized): case (lexer::token_type::uninitialized):
return "<uninitialized>"; return "<uninitialized>";
case (token_type::literal_true): case (lexer::token_type::literal_true):
return "true literal"; return "true literal";
case (token_type::literal_false): case (lexer::token_type::literal_false):
return "false literal"; return "false literal";
case (token_type::literal_null): case (lexer::token_type::literal_null):
return "null literal"; return "null literal";
case (token_type::value_string): case (lexer::token_type::value_string):
return "string literal"; return "string literal";
case (token_type::value_number): case (lexer::token_type::value_number):
return "number literal"; return "number literal";
case (token_type::begin_array): case (lexer::token_type::begin_array):
return "["; return "[";
case (token_type::begin_object): case (lexer::token_type::begin_object):
return "{"; return "{";
case (token_type::end_array): case (lexer::token_type::end_array):
return "]"; return "]";
case (token_type::end_object): case (lexer::token_type::end_object):
return "}"; return "}";
case (token_type::name_separator): case (lexer::token_type::name_separator):
return ":"; return ":";
case (token_type::value_separator): case (lexer::token_type::value_separator):
return ","; return ",";
case (token_type::parse_error): case (lexer::token_type::parse_error):
return "<parse error>"; return "<parse error>";
case (token_type::end_of_input): case (lexer::token_type::end_of_input):
return "<end of input>"; return "<end of input>";
} }
} }
inline void expect_new(token_type t) inline void expect(typename lexer::token_type t) const
{ {
if (t != last_token) if (t != last_token)
{ {
std::string error_msg = "parse error - unexpected \'"; std::string error_msg = "parse error - unexpected \'";
error_msg += static_cast<char>(m_begin[0]); error_msg += m_lexer.get_string_value();
error_msg += "\' (" + token_type_name(last_token); error_msg += "\' (" + token_type_name(last_token);
error_msg += "); expected " + token_type_name(t); error_msg += "); expected " + token_type_name(t);
throw std::invalid_argument(error_msg); throw std::invalid_argument(error_msg);
} }
} }
/*!
The pointer m_begin points to the opening quote of the string, and
m_cursor past the closing quote of the string. We create a std::string from
the character after the opening quotes (m_begin+1) until the character
before the closing quotes (hence subtracting 2 characters from the pointer
difference of the two pointers).
@return string value of current token without opening and closing quotes
@todo Take care of Unicode.
*/
inline std::string get_string() const
{
return std::string(
reinterpret_cast<const char*>(m_begin + 1),
static_cast<std::size_t>(m_cursor - m_begin - 2)
);
}
private: private:
/// the buffer /// the buffer
std::string buffer; std::string m_buffer;
/// a pointer to the next character to read from the buffer
const lexer_char_t* m_cursor = nullptr;
/// a pointer past the last character of the buffer
const lexer_char_t* m_limit = nullptr;
/// a pointer to the beginning of the current token
const lexer_char_t* m_begin = nullptr;
/// the type of the last read token /// the type of the last read token
token_type last_token = token_type::uninitialized; typename lexer::token_type last_token = lexer::token_type::uninitialized;
lexer m_lexer;
}; };
}; };
......
...@@ -3892,27 +3892,43 @@ TEST_CASE("deserialization") ...@@ -3892,27 +3892,43 @@ TEST_CASE("deserialization")
{ {
SECTION("string") SECTION("string")
{ {
auto s = "[\"foo\",1,2,3,false,{\"one\":1}]"; // auto s = "[\"foo\",1,2,3,false,{\"one\":1}]";
// json j = json::parse(s);
// CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
auto s = "null";
json j = json::parse(s); json j = json::parse(s);
CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}})); CHECK(j == json());
} }
SECTION("operator<<") SECTION("operator<<")
{ {
// std::stringstream ss;
// ss << "[\"foo\",1,2,3,false,{\"one\":1}]";
// json j;
// j << ss;
// CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
std::stringstream ss; std::stringstream ss;
ss << "[\"foo\",1,2,3,false,{\"one\":1}]"; ss << "null";
json j; json j;
j << ss; j << ss;
CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}})); CHECK(j == json());
} }
SECTION("operator>>") SECTION("operator>>")
{ {
// std::stringstream ss;
// ss << "[\"foo\",1,2,3,false,{\"one\":1}]";
// json j;
// ss >> j;
// CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
std::stringstream ss; std::stringstream ss;
ss << "[\"foo\",1,2,3,false,{\"one\":1}]"; ss << "null";
json j; json j;
ss >> j; ss >> j;
CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}})); CHECK(j == json());
} }
} }
...@@ -3980,42 +3996,42 @@ TEST_CASE("parser class") ...@@ -3980,42 +3996,42 @@ TEST_CASE("parser class")
{ {
SECTION("structural characters") SECTION("structural characters")
{ {
CHECK(json::parser("[").last_token == json::parser::token_type::begin_array); CHECK(json::parser("[").last_token == json::lexer::token_type::begin_array);
CHECK(json::parser("]").last_token == json::parser::token_type::end_array); CHECK(json::parser("]").last_token == json::lexer::token_type::end_array);
CHECK(json::parser("{").last_token == json::parser::token_type::begin_object); CHECK(json::parser("{").last_token == json::lexer::token_type::begin_object);
CHECK(json::parser("}").last_token == json::parser::token_type::end_object); CHECK(json::parser("}").last_token == json::lexer::token_type::end_object);
CHECK(json::parser(",").last_token == json::parser::token_type::value_separator); CHECK(json::parser(",").last_token == json::lexer::token_type::value_separator);
CHECK(json::parser(":").last_token == json::parser::token_type::name_separator); CHECK(json::parser(":").last_token == json::lexer::token_type::name_separator);
} }
SECTION("literal names") SECTION("literal names")
{ {
CHECK(json::parser("null").last_token == json::parser::token_type::literal_null); CHECK(json::parser("null").last_token == json::lexer::token_type::literal_null);
CHECK(json::parser("true").last_token == json::parser::token_type::literal_true); CHECK(json::parser("true").last_token == json::lexer::token_type::literal_true);
CHECK(json::parser("false").last_token == json::parser::token_type::literal_false); CHECK(json::parser("false").last_token == json::lexer::token_type::literal_false);
} }
SECTION("numbers") SECTION("numbers")
{ {
CHECK(json::parser("0").last_token == json::parser::token_type::value_number); CHECK(json::parser("0").last_token == json::lexer::token_type::value_number);
CHECK(json::parser("1").last_token == json::parser::token_type::value_number); CHECK(json::parser("1").last_token == json::lexer::token_type::value_number);
CHECK(json::parser("2").last_token == json::parser::token_type::value_number); CHECK(json::parser("2").last_token == json::lexer::token_type::value_number);
CHECK(json::parser("3").last_token == json::parser::token_type::value_number); CHECK(json::parser("3").last_token == json::lexer::token_type::value_number);
CHECK(json::parser("4").last_token == json::parser::token_type::value_number); CHECK(json::parser("4").last_token == json::lexer::token_type::value_number);
CHECK(json::parser("5").last_token == json::parser::token_type::value_number); CHECK(json::parser("5").last_token == json::lexer::token_type::value_number);
CHECK(json::parser("6").last_token == json::parser::token_type::value_number); CHECK(json::parser("6").last_token == json::lexer::token_type::value_number);
CHECK(json::parser("7").last_token == json::parser::token_type::value_number); CHECK(json::parser("7").last_token == json::lexer::token_type::value_number);
CHECK(json::parser("8").last_token == json::parser::token_type::value_number); CHECK(json::parser("8").last_token == json::lexer::token_type::value_number);
CHECK(json::parser("9").last_token == json::parser::token_type::value_number); CHECK(json::parser("9").last_token == json::lexer::token_type::value_number);
} }
SECTION("whitespace") SECTION("whitespace")
{ {
CHECK(json::parser(" 0").last_token == json::parser::token_type::value_number); CHECK(json::parser(" 0").last_token == json::lexer::token_type::value_number);
CHECK(json::parser("\t0").last_token == json::parser::token_type::value_number); CHECK(json::parser("\t0").last_token == json::lexer::token_type::value_number);
CHECK(json::parser("\n0").last_token == json::parser::token_type::value_number); CHECK(json::parser("\n0").last_token == json::lexer::token_type::value_number);
CHECK(json::parser("\r0").last_token == json::parser::token_type::value_number); CHECK(json::parser("\r0").last_token == json::lexer::token_type::value_number);
CHECK(json::parser(" \t\n\r\n\t 0").last_token == json::parser::token_type::value_number); CHECK(json::parser(" \t\n\r\n\t 0").last_token == json::lexer::token_type::value_number);
} }
/* /*
...@@ -4049,7 +4065,7 @@ TEST_CASE("parser class") ...@@ -4049,7 +4065,7 @@ TEST_CASE("parser class")
case ('9'): case ('9'):
case ('"'): case ('"'):
{ {
CHECK(json::parser(s).last_token != json::parser::token_type::parse_error); CHECK(json::parser(s).last_token != json::lexer::token_type::parse_error);
break; break;
} }
...@@ -4058,13 +4074,13 @@ TEST_CASE("parser class") ...@@ -4058,13 +4074,13 @@ TEST_CASE("parser class")
case ('\n'): case ('\n'):
case ('\r'): case ('\r'):
{ {
CHECK(json::parser(s).last_token == json::parser::token_type::end_of_input); CHECK(json::parser(s).last_token == json::lexer::token_type::end_of_input);
break; break;
} }
default: default:
{ {
CHECK(json::parser(s).last_token == json::parser::token_type::parse_error); CHECK(json::parser(s).last_token == json::lexer::token_type::parse_error);
break; break;
} }
} }
...@@ -4093,19 +4109,19 @@ TEST_CASE("parser class") ...@@ -4093,19 +4109,19 @@ TEST_CASE("parser class")
SECTION("token_type_name") SECTION("token_type_name")
{ {
CHECK(json::parser::token_type_name(json::parser::token_type::uninitialized) == "<uninitialized>"); CHECK(json::parser::token_type_name(json::lexer::token_type::uninitialized) == "<uninitialized>");
CHECK(json::parser::token_type_name(json::parser::token_type::literal_true) == "true literal"); CHECK(json::parser::token_type_name(json::lexer::token_type::literal_true) == "true literal");
CHECK(json::parser::token_type_name(json::parser::token_type::literal_false) == "false literal"); CHECK(json::parser::token_type_name(json::lexer::token_type::literal_false) == "false literal");
CHECK(json::parser::token_type_name(json::parser::token_type::literal_null) == "null literal"); CHECK(json::parser::token_type_name(json::lexer::token_type::literal_null) == "null literal");
CHECK(json::parser::token_type_name(json::parser::token_type::value_string) == "string literal"); CHECK(json::parser::token_type_name(json::lexer::token_type::value_string) == "string literal");
CHECK(json::parser::token_type_name(json::parser::token_type::value_number) == "number literal"); CHECK(json::parser::token_type_name(json::lexer::token_type::value_number) == "number literal");
CHECK(json::parser::token_type_name(json::parser::token_type::begin_array) == "["); CHECK(json::parser::token_type_name(json::lexer::token_type::begin_array) == "[");
CHECK(json::parser::token_type_name(json::parser::token_type::begin_object) == "{"); CHECK(json::parser::token_type_name(json::lexer::token_type::begin_object) == "{");
CHECK(json::parser::token_type_name(json::parser::token_type::end_array) == "]"); CHECK(json::parser::token_type_name(json::lexer::token_type::end_array) == "]");
CHECK(json::parser::token_type_name(json::parser::token_type::end_object) == "}"); CHECK(json::parser::token_type_name(json::lexer::token_type::end_object) == "}");
CHECK(json::parser::token_type_name(json::parser::token_type::name_separator) == ":"); CHECK(json::parser::token_type_name(json::lexer::token_type::name_separator) == ":");
CHECK(json::parser::token_type_name(json::parser::token_type::value_separator) == ","); CHECK(json::parser::token_type_name(json::lexer::token_type::value_separator) == ",");
CHECK(json::parser::token_type_name(json::parser::token_type::parse_error) == "<parse error>"); CHECK(json::parser::token_type_name(json::lexer::token_type::parse_error) == "<parse error>");
CHECK(json::parser::token_type_name(json::parser::token_type::end_of_input) == "<end of input>"); CHECK(json::parser::token_type_name(json::lexer::token_type::end_of_input) == "<end of input>");
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment