+ moved lexer to class

8a4e127a · Niels · e845cd1d · 8a4e127a · 8a4e127a · 8a4e127a
Commit 8a4e127a authored Feb 11, 2015 by Niels
Hide whitespace changes
Inline Side-by-side

Showing with 815 additions and 906 deletions

json.hpp src/json.hpp +576 -687

json.hpp.re2c src/json.hpp.re2c +176 -172

unit.cpp test/unit.cpp +63 -47

No files found.
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -14,6 +14,7 @@
 #include <type_traits>
 #include <utility>
 #include <vector>
+#include <cmath>
 /*!
 - ObjectType trick from http://stackoverflow.com/a/9860911
@@ -2384,9 +2385,9 @@ class basic_json
    // parser //
    ////////////
-    class parser
+    class lexer
    {
-      private:
+      public:
        /// token types for the parser
        enum class token_type
        {
@@ -2406,697 +2407,275 @@ class basic_json
            end_of_input
        };
-        /// the type of a lexer character
+        inline lexer(const char* s) : m_content(s)
-        using lexer_char_t = unsigned char;
+        {
+            m_start = m_cursor = m_content;
-      public:
+            m_limit = m_content + strlen(m_content);
-        /// constructor for strings
+        }
-        inline parser(const std::string& s) : buffer(s)
-        {
+        inline lexer() = default;
-            // set buffer for RE2C
-            m_cursor = reinterpret_cast<const lexer_char_t*>(buffer.c_str());
+#define YYMAXFILL 5
-            // set a pointer past the end of the buffer
-            m_limit = m_cursor + buffer.size();
+        inline token_type scan()
-            // read first token
+        {
-            get_token();
+#define YYFILL(n)
-        }
+            {
-        /// a parser reading from an input stream
+                char yych;
-        inline parser(std::istream& _is)
+                static const unsigned char yybm[] = {
-        {
+                    128, 128, 128, 128, 128, 128, 128, 128,
-            while (_is)
+                    128, 128, 128, 128, 128, 128, 128, 128,
-            {
+                    128, 128, 128, 128, 128, 128, 128, 128,
-                std::string input_line;
+                    128, 128, 128, 128, 128, 128, 128, 128,
-                std::getline(_is, input_line);
+                    128, 128,   0, 128, 128, 128, 128, 128,
-                buffer += input_line;
+                    128, 128, 128, 128, 128, 128, 128, 128,
-            }
+                    192, 192, 192, 192, 192, 192, 192, 192,
+                    192, 192, 128, 128, 128, 128, 128, 128,
-            // set buffer for RE2C
+                    128, 128, 128, 128, 128, 128, 128, 128,
-            m_cursor = reinterpret_cast<const lexer_char_t*>(buffer.c_str());
+                    128, 128, 128, 128, 128, 128, 128, 128,
-            // set a pointer past the end of the buffer
+                    128, 128, 128, 128, 128, 128, 128, 128,
-            m_limit = m_cursor + buffer.size();
+                    128, 128, 128, 128,   0, 128, 128, 128,
-            // read first token
+                    128, 128, 128, 128, 128, 128, 128, 128,
-            get_token();
+                    128, 128, 128, 128, 128, 128, 128, 128,
-        }
+                    128, 128, 128, 128, 128, 128, 128, 128,
+                    128, 128, 128, 128, 128, 128, 128, 128,
-        inline basic_json parse()
+                    128, 128, 128, 128, 128, 128, 128, 128,
-        {
+                    128, 128, 128, 128, 128, 128, 128, 128,
-            switch (last_token)
+                    128, 128, 128, 128, 128, 128, 128, 128,
-            {
+                    128, 128, 128, 128, 128, 128, 128, 128,
-                case (token_type::begin_object):
+                    128, 128, 128, 128, 128, 128, 128, 128,
-                {
+                    128, 128, 128, 128, 128, 128, 128, 128,
-                    // explicitly set result to object to cope with {}
+                    128, 128, 128, 128, 128, 128, 128, 128,
-                    basic_json result(value_t::object);
+                    128, 128, 128, 128, 128, 128, 128, 128,
+                    128, 128, 128, 128, 128, 128, 128, 128,
-                    // read next token
+                    128, 128, 128, 128, 128, 128, 128, 128,
-                    get_token();
+                    128, 128, 128, 128, 128, 128, 128, 128,
+                    128, 128, 128, 128, 128, 128, 128, 128,
-                    // closing } -> we are done
+                    128, 128, 128, 128, 128, 128, 128, 128,
-                    if (last_token == token_type::end_object)
+                    128, 128, 128, 128, 128, 128, 128, 128,
-                    {
+                    128, 128, 128, 128, 128, 128, 128, 128,
-                        return result;
+                    128, 128, 128, 128, 128, 128, 128, 128,
-                    }
-                    // otherwise: parse key-value pairs
-                    do
-                    {
-                        // store key
-                        expect_new(token_type::value_string);
-                        const auto key = get_string();
-                        // parse separator (:)
-                        get_token();
-                        expect_new(token_type::name_separator);
-                        // parse value
-                        get_token();
-                        result[key] = parse();
-                        // read next character
-                        get_token();
-                    }
-                    while (last_token == token_type::value_separator
-                            and get_token() == last_token);
-                    // closing }
-                    expect_new(token_type::end_object);
-                    return result;
-                }
-                case (token_type::begin_array):
-                {
-                    // explicitly set result to object to cope with []
-                    basic_json result(value_t::array);
-                    // read next token
-                    get_token();
-                    // closing ] -> we are done
-                    if (last_token == token_type::end_array)
-                    {
-                        return result;
-                    }
-                    // otherwise: parse values
-                    do
-                    {
-                        // parse value
-                        result.push_back(parse());
-                        // read next character
-                        get_token();
-                    }
-                    while (last_token == token_type::value_separator
-                            and get_token() == last_token);
-                    // closing ]
-                    expect_new(token_type::end_array);
-                    return result;
-                }
-                case (token_type::literal_null):
-                {
-                    return basic_json(nullptr);
-                }
-                case (token_type::value_string):
-                {
-                    return basic_json(get_string());
-                }
-                case (token_type::literal_true):
-                {
-                    return basic_json(true);
-                }
-                case (token_type::literal_false):
-                {
-                    return basic_json(false);
-                }
-                case (token_type::value_number):
-                {
-                    // The pointer m_begin points to the beginning of the
-                    // parsed number. We pass this pointer to std::strtod which
-                    // sets endptr to the first character past the converted
-                    // number. If this pointer is not the same as m_cursor,
-                    // then either more or less characters have been used
-                    // during the comparison. This can happen for inputs like
-                    // "01" which will be treated like number 0 followed by
-                    // number 1.
-                    // conversion
-                    char* endptr;
-                    const auto float_val = std::strtod(reinterpret_cast<const char*>(m_begin), &endptr);
-                    // check if strtod read beyond the end of the lexem
-                    if (reinterpret_cast<const lexer_char_t*>(endptr) != m_cursor)
-                    {
-                        throw std::invalid_argument(std::string("parse error - ") +
-                                                    reinterpret_cast<const char*>(m_begin) + " is not a number");
-                    }
-                    // check if conversion loses precision
-                    const auto int_val = static_cast<int>(float_val);
-                    if (float_val == int_val)
-                    {
-                        // we basic_json not lose precision -> return int
-                        return basic_json(int_val);
-                    }
-                    else
-                    {
-                        // we would lose precision -> returnfloat
-                        return basic_json(float_val);
-                    }
-                }
-                default:
-                {
-                    std::string error_msg = "parse error - unexpected \'";
-                    error_msg += static_cast<char>(m_begin[0]);
-                    error_msg += "\' (";
-                    error_msg += token_type_name(last_token) + ")";
-                    throw std::invalid_argument(error_msg);
-                }
-            }
-        }
-      private:
-        /*!
-        This function implements a scanner for JSON. It is specified using
-        regular expressions that try to follow RFC 7159 and ECMA-404 as close
-        as possible. These regular expressions are then translated into a
-        deterministic finite automaton (DFA) by the tool RE2C. As a result, the
-        translated code for this function consists of a large block of code
-        with goto jumps.
-        @return the class of the next token read from the buffer
-        @todo Unicode support needs to be checked.
-        */
-        inline token_type get_token()
-        {
-            // needed by RE2C
-            const lexer_char_t* marker = nullptr;
-            // set up RE2C
-json_parser_lexer_start:
-            // set current to the begin of the buffer
-            m_begin = m_cursor;
-            if (m_begin == m_limit)
-            {
-                return last_token = token_type::end_of_input;
-            }
-            {
-                lexer_char_t yych;
-                unsigned int yyaccept = 0;
-                static const unsigned char yybm[] =
-                {
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64, 192, 192,  64,  64, 192,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    192,  64,   0,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    96,  96,  96,  96,  96,  96,  96,  96,
-                    96,  96,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,   0,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
-                    64,  64,  64,  64,  64,  64,  64,  64,
                };
                yych = *m_cursor;
-                if (yych <= ':')
+                if (yych <= '[')
                {
-                    if (yych <= '!')
+                    if (yych <= '-')
                    {
-                        if (yych <= '\f')
+                        if (yych <= '"')
                        {
-                            if (yych <= 0x08)
+                            if (yych <= 0x00)
                            {
-                                goto json_parser_3;
+                                goto yy25;
                            }
-                            if (yych <= '\n')
+                            if (yych >= '"')
                            {
-                                goto json_parser_5;
+                                goto yy23;
                            }
-                            goto json_parser_3;
                        }
                        else
                        {
-                            if (yych <= '\r')
+                            if (yych <= '+')
                            {
-                                goto json_parser_5;
+                                goto yy2;
                            }
-                            if (yych == ' ')
+                            if (yych <= ',')
                            {
-                                goto json_parser_5;
+                                goto yy11;
                            }
-                            goto json_parser_3;
+                            goto yy18;
                        }
                    }
                    else
                    {
-                        if (yych <= '-')
+                        if (yych <= '9')
                        {
-                            if (yych <= '"')
+                            if (yych <= '/')
-                            {
-                                goto json_parser_6;
-                            }
-                            if (yych <= '+')
                            {
-                                goto json_parser_3;
+                                goto yy2;
                            }
-                            if (yych <= ',')
+                            if (yych <= '0')
                            {
-                                goto json_parser_7;
+                                goto yy19;
                            }
-                            goto json_parser_9;
+                            goto yy21;
                        }
                        else
                        {
-                            if (yych <= '/')
+                            if (yych <= ':')
-                            {
-                                goto json_parser_3;
-                            }
-                            if (yych <= '0')
                            {
-                                goto json_parser_10;
+                                goto yy13;
                            }
-                            if (yych <= '9')
+                            if (yych >= '[')
                            {
-                                goto json_parser_12;
+                                goto yy3;
                            }
-                            goto json_parser_13;
                        }
                    }
                }
-                else
+                else {
-                {
+                    if (yych <= 'n')
-                    if (yych <= 'm')
                    {
-                        if (yych <= '\\')
+                        if (yych <= 'e')
                        {
-                            if (yych == '[')
+                            if (yych == ']')
                            {
-                                goto json_parser_15;
+                                goto yy5;
                            }
-                            goto json_parser_3;
                        }
                        else
                        {
-                            if (yych <= ']')
+                            if (yych <= 'f')
                            {
-                                goto json_parser_17;
+                                goto yy17;
                            }
-                            if (yych == 'f')
+                            if (yych >= 'n')
                            {
-                                goto json_parser_19;
+                                goto yy15;
                            }
-                            goto json_parser_3;
                        }
                    }
-                    else
+                    else {
-                    {
                        if (yych <= 'z')
                        {
-                            if (yych <= 'n')
-                            {
-                                goto json_parser_20;
-                            }
                            if (yych == 't')
                            {
-                                goto json_parser_21;
+                                goto yy16;
                            }
-                            goto json_parser_3;
                        }
-                        else
+                        else {
-                        {
                            if (yych <= '{')
                            {
-                                goto json_parser_22;
+                                goto yy7;
                            }
                            if (yych == '}')
                            {
-                                goto json_parser_24;
+                                goto yy9;
                            }
-                            goto json_parser_3;
                        }
                    }
                }
-json_parser_2:
+yy2:
+                m_cursor = m_marker;
+                goto yy20;
+yy3:
+                ++m_cursor;
+                { return token_type::begin_array; }
+yy5:
+                ++m_cursor;
+                { return token_type::end_array; }
+yy7:
+                ++m_cursor;
+                { return token_type::begin_object; }
+yy9:
+                ++m_cursor;
+                { return token_type::end_object; }
+yy11:
+                ++m_cursor;
+                { return token_type::value_separator; }
+yy13:
+                ++m_cursor;
+                { return token_type::name_separator; }
+yy15:
+                yych = *++m_cursor;
+                if (yych == 'u')
                {
-                    goto json_parser_lexer_start;
+                    goto yy50;
                }
-json_parser_3:
+                goto yy2;
-                ++m_cursor;
+yy16:
-json_parser_4:
+                yych = *++m_cursor;
+                if (yych == 'r')
                {
-                    return last_token = token_type::parse_error;
+                    goto yy46;
                }
-json_parser_5:
+                goto yy2;
+yy17:
                yych = *++m_cursor;
-                goto json_parser_60;
+                if (yych == 'a')
-json_parser_6:
-                yyaccept = 0;
-                yych = *(marker = ++m_cursor);
-                goto json_parser_51;
-json_parser_7:
-                ++m_cursor;
                {
-                    return last_token = token_type::value_separator;
+                    goto yy41;
                }
-json_parser_9:
+                goto yy2;
+yy18:
                yych = *++m_cursor;
                if (yych <= '/')
                {
-                    goto json_parser_4;
+                    goto yy2;
                }
                if (yych <= '0')
                {
-                    goto json_parser_49;
+                    goto yy19;
                }
                if (yych <= '9')
                {
-                    goto json_parser_40;
+                    goto yy21;
                }
-                goto json_parser_4;
+                goto yy2;
-json_parser_10:
+yy19:
-                yyaccept = 1;
+                yych = *(m_marker = ++m_cursor);
-                yych = *(marker = ++m_cursor);
                if (yych <= 'D')
                {
                    if (yych == '.')
                    {
-                        goto json_parser_42;
+                        goto yy34;
                    }
                }
-                else
+                else {
-                {
                    if (yych <= 'E')
                    {
-                        goto json_parser_43;
+                        goto yy35;
                    }
                    if (yych == 'e')
                    {
-                        goto json_parser_43;
+                        goto yy35;
                    }
                }
-json_parser_11:
+yy20:
-                {
+                { return token_type::value_number; }
-                    return last_token = token_type::value_number;
+yy21:
-                }
+                m_marker = ++m_cursor;
-json_parser_12:
-                yyaccept = 1;
-                yych = *(marker = ++m_cursor);
-                goto json_parser_41;
-json_parser_13:
-                ++m_cursor;
-                {
-                    return last_token = token_type::name_separator;
-                }
-json_parser_15:
-                ++m_cursor;
-                {
-                    return last_token = token_type::begin_array;
-                }
-json_parser_17:
-                ++m_cursor;
-                {
-                    return last_token = token_type::end_array;
-                }
-json_parser_19:
-                yyaccept = 0;
-                yych = *(marker = ++m_cursor);
-                if (yych == 'a')
-                {
-                    goto json_parser_35;
-                }
-                goto json_parser_4;
-json_parser_20:
-                yyaccept = 0;
-                yych = *(marker = ++m_cursor);
-                if (yych == 'u')
-                {
-                    goto json_parser_31;
-                }
-                goto json_parser_4;
-json_parser_21:
-                yyaccept = 0;
-                yych = *(marker = ++m_cursor);
-                if (yych == 'r')
-                {
-                    goto json_parser_26;
-                }
-                goto json_parser_4;
-json_parser_22:
-                ++m_cursor;
-                {
-                    return last_token = token_type::begin_object;
-                }
-json_parser_24:
-                ++m_cursor;
-                {
-                    return last_token = token_type::end_object;
-                }
-json_parser_26:
-                yych = *++m_cursor;
-                if (yych == 'u')
-                {
-                    goto json_parser_28;
-                }
-json_parser_27:
-                m_cursor = marker;
-                if (yyaccept == 0)
-                {
-                    goto json_parser_4;
-                }
-                else
-                {
-                    goto json_parser_11;
-                }
-json_parser_28:
-                yych = *++m_cursor;
-                if (yych != 'e')
-                {
-                    goto json_parser_27;
-                }
-                ++m_cursor;
-                {
-                    return last_token = token_type::literal_true;
-                }
-json_parser_31:
-                yych = *++m_cursor;
-                if (yych != 'l')
-                {
-                    goto json_parser_27;
-                }
-                yych = *++m_cursor;
-                if (yych != 'l')
-                {
-                    goto json_parser_27;
-                }
-                ++m_cursor;
-                {
-                    return last_token = token_type::literal_null;
-                }
-json_parser_35:
-                yych = *++m_cursor;
-                if (yych != 'l')
-                {
-                    goto json_parser_27;
-                }
-                yych = *++m_cursor;
-                if (yych != 's')
-                {
-                    goto json_parser_27;
-                }
-                yych = *++m_cursor;
-                if (yych != 'e')
-                {
-                    goto json_parser_27;
-                }
-                ++m_cursor;
-                {
-                    return last_token = token_type::literal_false;
-                }
-json_parser_40:
-                yyaccept = 1;
-                marker = ++m_cursor;
                yych = *m_cursor;
-json_parser_41:
+                if (yybm[0 + yych] & 64)
-                if (yybm[0 + yych] & 32)
                {
-                    goto json_parser_40;
+                    goto yy21;
                }
                if (yych <= 'D')
                {
-                    if (yych != '.')
+                    if (yych == '.')
                    {
-                        goto json_parser_11;
+                        goto yy34;
                    }
+                    goto yy20;
                }
-                else
+                else {
-                {
                    if (yych <= 'E')
                    {
-                        goto json_parser_43;
+                        goto yy35;
                    }
                    if (yych == 'e')
                    {
-                        goto json_parser_43;
+                        goto yy35;
                    }
-                    goto json_parser_11;
+                    goto yy20;
                }
-json_parser_42:
+yy23:
-                yych = *++m_cursor;
+                ++m_cursor;
-                if (yych <= '/')
+                yych = *m_cursor;
+                if (yybm[0 + yych] & 128)
                {
-                    goto json_parser_27;
+                    goto yy23;
                }
-                if (yych <= '9')
+                if (yych <= '"')
                {
-                    goto json_parser_47;
+                    goto yy28;
                }
-                goto json_parser_27;
+                goto yy27;
-json_parser_43:
+yy25:
-                yych = *++m_cursor;
-                if (yych <= ',')
-                {
-                    if (yych != '+')
-                    {
-                        goto json_parser_27;
-                    }
-                }
-                else
-                {
-                    if (yych <= '-')
-                    {
-                        goto json_parser_44;
-                    }
-                    if (yych <= '/')
-                    {
-                        goto json_parser_27;
-                    }
-                    if (yych <= '9')
-                    {
-                        goto json_parser_45;
-                    }
-                    goto json_parser_27;
-                }
-json_parser_44:
-                yych = *++m_cursor;
-                if (yych <= '/')
-                {
-                    goto json_parser_27;
-                }
-                if (yych >= ':')
-                {
-                    goto json_parser_27;
-                }
-json_parser_45:
                ++m_cursor;
-                yych = *m_cursor;
+                { return token_type::end_of_input; }
-                if (yych <= '/')
+yy27:
-                {
-                    goto json_parser_11;
-                }
-                if (yych <= '9')
-                {
-                    goto json_parser_45;
-                }
-                goto json_parser_11;
-json_parser_47:
-                yyaccept = 1;
-                marker = ++m_cursor;
-                yych = *m_cursor;
-                if (yych <= 'D')
-                {
-                    if (yych <= '/')
-                    {
-                        goto json_parser_11;
-                    }
-                    if (yych <= '9')
-                    {
-                        goto json_parser_47;
-                    }
-                    goto json_parser_11;
-                }
-                else
-                {
-                    if (yych <= 'E')
-                    {
-                        goto json_parser_43;
-                    }
-                    if (yych == 'e')
-                    {
-                        goto json_parser_43;
-                    }
-                    goto json_parser_11;
-                }
-json_parser_49:
-                yyaccept = 1;
-                yych = *(marker = ++m_cursor);
-                if (yych <= 'D')
-                {
-                    if (yych == '.')
-                    {
-                        goto json_parser_42;
-                    }
-                    goto json_parser_11;
-                }
-                else
-                {
-                    if (yych <= 'E')
-                    {
-                        goto json_parser_43;
-                    }
-                    if (yych == 'e')
-                    {
-                        goto json_parser_43;
-                    }
-                    goto json_parser_11;
-                }
-json_parser_50:
-                ++m_cursor;
-                yych = *m_cursor;
-json_parser_51:
-                if (yybm[0 + yych] & 64)
-                {
-                    goto json_parser_50;
-                }
-                if (yych <= '"')
-                {
-                    goto json_parser_53;
-                }
                ++m_cursor;
                yych = *m_cursor;
                if (yych <= 'e')
@@ -3105,13 +2684,13 @@ json_parser_51:
                    {
                        if (yych == '"')
                        {
-                            goto json_parser_50;
+                            goto yy23;
                        }
                        if (yych <= '.')
                        {
-                            goto json_parser_27;
+                            goto yy2;
                        }
-                        goto json_parser_50;
+                        goto yy23;
                    }
                    else
                    {
@@ -3119,271 +2698,581 @@ json_parser_51:
                        {
                            if (yych <= '[')
                            {
-                                goto json_parser_27;
+                                goto yy2;
                            }
-                            goto json_parser_50;
+                            goto yy23;
                        }
                        else
                        {
                            if (yych == 'b')
                            {
-                                goto json_parser_50;
+                                goto yy23;
                            }
-                            goto json_parser_27;
+                            goto yy2;
                        }
                    }
                }
-                else
+                else {
-                {
                    if (yych <= 'q')
                    {
                        if (yych <= 'f')
                        {
-                            goto json_parser_50;
+                            goto yy23;
                        }
                        if (yych == 'n')
                        {
-                            goto json_parser_50;
+                            goto yy23;
                        }
-                        goto json_parser_27;
+                        goto yy2;
                    }
-                    else
+                    else {
-                    {
                        if (yych <= 's')
                        {
                            if (yych <= 'r')
                            {
-                                goto json_parser_50;
+                                goto yy23;
                            }
-                            goto json_parser_27;
+                            goto yy2;
                        }
-                        else
+                        else {
-                        {
                            if (yych <= 't')
                            {
-                                goto json_parser_50;
+                                goto yy23;
                            }
                            if (yych <= 'u')
                            {
-                                goto json_parser_55;
+                                goto yy30;
                            }
-                            goto json_parser_27;
+                            goto yy2;
                        }
                    }
                }
-json_parser_53:
+yy28:
                ++m_cursor;
-                {
+                { return token_type::value_string; }
-                    return last_token = token_type::value_string;
+yy30:
-                }
-json_parser_55:
                ++m_cursor;
                yych = *m_cursor;
                if (yych <= '@')
                {
                    if (yych <= '/')
                    {
-                        goto json_parser_27;
+                        goto yy2;
                    }
                    if (yych >= ':')
                    {
-                        goto json_parser_27;
+                        goto yy2;
                    }
                }
-                else
+                else {
-                {
                    if (yych <= 'F')
                    {
-                        goto json_parser_56;
+                        goto yy31;
                    }
                    if (yych <= '`')
                    {
-                        goto json_parser_27;
+                        goto yy2;
                    }
                    if (yych >= 'g')
                    {
-                        goto json_parser_27;
+                        goto yy2;
                    }
                }
-json_parser_56:
+yy31:
                ++m_cursor;
                yych = *m_cursor;
                if (yych <= '@')
                {
                    if (yych <= '/')
                    {
-                        goto json_parser_27;
+                        goto yy2;
                    }
                    if (yych >= ':')
                    {
-                        goto json_parser_27;
+                        goto yy2;
                    }
                }
-                else
+                else {
-                {
                    if (yych <= 'F')
                    {
-                        goto json_parser_57;
+                        goto yy32;
                    }
                    if (yych <= '`')
                    {
-                        goto json_parser_27;
+                        goto yy2;
                    }
                    if (yych >= 'g')
                    {
-                        goto json_parser_27;
+                        goto yy2;
                    }
                }
-json_parser_57:
+yy32:
                ++m_cursor;
                yych = *m_cursor;
                if (yych <= '@')
                {
                    if (yych <= '/')
                    {
-                        goto json_parser_27;
+                        goto yy2;
                    }
                    if (yych >= ':')
                    {
-                        goto json_parser_27;
+                        goto yy2;
                    }
                }
-                else
+                else {
-                {
                    if (yych <= 'F')
                    {
-                        goto json_parser_58;
+                        goto yy33;
                    }
                    if (yych <= '`')
                    {
-                        goto json_parser_27;
+                        goto yy2;
                    }
                    if (yych >= 'g')
                    {
-                        goto json_parser_27;
+                        goto yy2;
                    }
                }
-json_parser_58:
+yy33:
                ++m_cursor;
                yych = *m_cursor;
                if (yych <= '@')
                {
                    if (yych <= '/')
                    {
-                        goto json_parser_27;
+                        goto yy2;
                    }
                    if (yych <= '9')
                    {
-                        goto json_parser_50;
+                        goto yy23;
                    }
-                    goto json_parser_27;
+                    goto yy2;
                }
-                else
+                else {
-                {
                    if (yych <= 'F')
                    {
-                        goto json_parser_50;
+                        goto yy23;
                    }
                    if (yych <= '`')
                    {
-                        goto json_parser_27;
+                        goto yy2;
                    }
                    if (yych <= 'f')
                    {
-                        goto json_parser_50;
+                        goto yy23;
                    }
-                    goto json_parser_27;
+                    goto yy2;
+                }
+yy34:
+                yych = *++m_cursor;
+                if (yych <= '/')
+                {
+                    goto yy2;
                }
-json_parser_59:
+                if (yych <= '9')
+                {
+                    goto yy39;
+                }
+                goto yy2;
+yy35:
+                yych = *++m_cursor;
+                if (yych <= ',')
+                {
+                    if (yych != '+')
+                    {
+                        goto yy2;
+                    }
+                }
+                else {
+                    if (yych <= '-')
+                    {
+                        goto yy36;
+                    }
+                    if (yych <= '/')
+                    {
+                        goto yy2;
+                    }
+                    if (yych <= '9')
+                    {
+                        goto yy37;
+                    }
+                    goto yy2;
+                }
+yy36:
+                yych = *++m_cursor;
+                if (yych <= '/')
+                {
+                    goto yy2;
+                }
+                if (yych >= ':')
+                {
+                    goto yy2;
+                }
+yy37:
                ++m_cursor;
                yych = *m_cursor;
-json_parser_60:
+                if (yych <= '/')
-                if (yybm[0 + yych] & 128)
+                {
+                    goto yy20;
+                }
+                if (yych <= '9')
+                {
+                    goto yy37;
+                }
+                goto yy20;
+yy39:
+                m_marker = ++m_cursor;
+                yych = *m_cursor;
+                if (yych <= 'D')
+                {
+                    if (yych <= '/')
+                    {
+                        goto yy20;
+                    }
+                    if (yych <= '9')
+                    {
+                        goto yy39;
+                    }
+                    goto yy20;
+                }
+                else {
+                    if (yych <= 'E')
+                    {
+                        goto yy35;
+                    }
+                    if (yych == 'e')
+                    {
+                        goto yy35;
+                    }
+                    goto yy20;
+                }
+yy41:
+                yych = *++m_cursor;
+                if (yych != 'l')
+                {
+                    goto yy2;
+                }
+                yych = *++m_cursor;
+                if (yych != 's')
+                {
+                    goto yy2;
+                }
+                yych = *++m_cursor;
+                if (yych != 'e')
+                {
+                    goto yy2;
+                }
+                ++m_cursor;
+                { return token_type::literal_false; }
+yy46:
+                yych = *++m_cursor;
+                if (yych != 'u')
+                {
+                    goto yy2;
+                }
+                yych = *++m_cursor;
+                if (yych != 'e')
                {
-                    goto json_parser_59;
+                    goto yy2;
                }
-                goto json_parser_2;
+                ++m_cursor;
+                { return token_type::literal_true; }
+yy50:
+                yych = *++m_cursor;
+                if (yych != 'l')
+                {
+                    goto yy2;
+                }
+                yych = *++m_cursor;
+                if (yych != 'l')
+                {
+                    goto yy2;
+                }
+                ++m_cursor;
+                { return token_type::literal_null; }
            }
        }
-        inline static std::string token_type_name(token_type t)
+        inline std::string get_string_value() const
+        {
+            return std::string(m_start, static_cast<size_t>(m_cursor - m_start));
+        }
+        /*!
+        The pointer m_begin points to the opening quote of the string, and
+        m_cursor past the closing quote of the string. We create a std::string from
+        the character after the opening quotes (m_begin+1) until the character
+        before the closing quotes (hence subtracting 2 characters from the pointer
+        difference of the two pointers).
+        @return string value of current token without opening and closing quotes
+        @todo Take care of Unicode.
+        */
+        inline std::string get_string() const
+        {
+            return std::string(m_start + 1, static_cast<size_t>(m_cursor - m_start - 2));
+        }
+        inline number_float_t get_number() const
+        {
+            // The pointer m_begin points to the beginning of the
+            // parsed number. We pass this pointer to std::strtod which
+            // sets endptr to the first character past the converted
+            // number. If this pointer is not the same as m_cursor,
+            // then either more or less characters have been used
+            // during the comparison. This can happen for inputs like
+            // "01" which will be treated like number 0 followed by
+            // number 1.
+            // conversion
+            char* endptr;
+            const auto float_val = std::strtod(reinterpret_cast<const char*>(m_start), &endptr);
+            // check if strtod read beyond the end of the lexem
+            if (endptr != m_cursor)
+            {
+                std::cerr << get_string_value() << std::endl;
+                return NAN;
+            }
+            else
+            {
+                return float_val;
+            }
+        }
+      private:
+        const char* m_content = nullptr;
+        const char* m_start = nullptr;
+        const char* m_cursor = nullptr;
+        const char* m_limit = nullptr;
+        const char* m_marker = nullptr;
+        const char* m_ctxmarker = nullptr;
+    };
+    class parser
+    {
+      public:
+        /// constructor for strings
+        inline parser(const std::string& s) : m_buffer(s), m_lexer(m_buffer.c_str())
+        {
+            // read first token
+            get_token();
+        }
+        /// a parser reading from an input stream
+        inline parser(std::istream& _is)
+        {
+            while (_is)
+            {
+                std::string input_line;
+                std::getline(_is, input_line);
+                m_buffer += input_line;
+            }
+            // initializer lexer
+            m_lexer = lexer(m_buffer.c_str());
+            // read first token
+            get_token();
+        }
+        inline basic_json parse()
+        {
+            switch (last_token)
+            {
+                case (lexer::token_type::begin_object):
+                {
+                    // explicitly set result to object to cope with {}
+                    basic_json result(value_t::object);
+                    // read next token
+                    get_token();
+                    // closing } -> we are done
+                    if (last_token == lexer::token_type::end_object)
+                    {
+                        return result;
+                    }
+                    // otherwise: parse key-value pairs
+                    do
+                    {
+                        // store key
+                        expect(lexer::token_type::value_string);
+                        const auto key = m_lexer.get_string();
+                        // parse separator (:)
+                        get_token();
+                        expect(lexer::token_type::name_separator);
+                        // parse value
+                        get_token();
+                        result[key] = parse();
+                        // read next character
+                        get_token();
+                    }
+                    while (last_token == lexer::token_type::value_separator
+                            and get_token() == last_token);
+                    // closing }
+                    expect(lexer::token_type::end_object);
+                    return result;
+                }
+                case (lexer::token_type::begin_array):
+                {
+                    // explicitly set result to object to cope with []
+                    basic_json result(value_t::array);
+                    // read next token
+                    get_token();
+                    // closing ] -> we are done
+                    if (last_token == lexer::token_type::end_array)
+                    {
+                        return result;
+                    }
+                    // otherwise: parse values
+                    do
+                    {
+                        // parse value
+                        result.push_back(parse());
+                        // read next character
+                        get_token();
+                    }
+                    while (last_token == lexer::token_type::value_separator
+                            and get_token() == last_token);
+                    // closing ]
+                    expect(lexer::token_type::end_array);
+                    return result;
+                }
+                case (lexer::token_type::literal_null):
+                {
+                    return basic_json(nullptr);
+                }
+                case (lexer::token_type::value_string):
+                {
+                    return basic_json(m_lexer.get_string());
+                }
+                case (lexer::token_type::literal_true):
+                {
+                    return basic_json(true);
+                }
+                case (lexer::token_type::literal_false):
+                {
+                    return basic_json(false);
+                }
+                case (lexer::token_type::value_number):
+                {
+                    auto float_val = m_lexer.get_number();
+                    if (std::isnan(float_val))
+                    {
+                        throw std::invalid_argument(std::string("parse error - ") +
+                                                    m_lexer.get_string_value() + " is not a number");
+                    }
+                    // check if conversion loses precision
+                    const auto int_val = static_cast<number_integer_t>(float_val);
+                    if (float_val == int_val)
+                    {
+                        // we basic_json not lose precision -> return int
+                        return basic_json(int_val);
+                    }
+                    else
+                    {
+                        // we would lose precision -> returnfloat
+                        return basic_json(float_val);
+                    }
+                }
+                default:
+                {
+                    std::string error_msg = "parse error - unexpected \'";
+                    error_msg += m_lexer.get_string_value();
+                    error_msg += "\' (";
+                    error_msg += token_type_name(last_token) + ")";
+                    throw std::invalid_argument(error_msg);
+                }
+            }
+        }
+      private:
+        /// get next token from lexer
+        inline typename lexer::token_type get_token()
+        {
+            last_token = m_lexer.scan();
+            return last_token;
+        }
+        inline static std::string token_type_name(typename lexer::token_type t)
        {
            switch (t)
            {
-                case (token_type::uninitialized):
+                case (lexer::token_type::uninitialized):
                    return "<uninitialized>";
-                case (token_type::literal_true):
+                case (lexer::token_type::literal_true):
                    return "true literal";
-                case (token_type::literal_false):
+                case (lexer::token_type::literal_false):
                    return "false literal";
-                case (token_type::literal_null):
+                case (lexer::token_type::literal_null):
                    return "null literal";
-                case (token_type::value_string):
+                case (lexer::token_type::value_string):
                    return "string literal";
-                case (token_type::value_number):
+                case (lexer::token_type::value_number):
                    return "number literal";
-                case (token_type::begin_array):
+                case (lexer::token_type::begin_array):
                    return "[";
-                case (token_type::begin_object):
+                case (lexer::token_type::begin_object):
                    return "{";
-                case (token_type::end_array):
+                case (lexer::token_type::end_array):
                    return "]";
-                case (token_type::end_object):
+                case (lexer::token_type::end_object):
                    return "}";
-                case (token_type::name_separator):
+                case (lexer::token_type::name_separator):
                    return ":";
-                case (token_type::value_separator):
+                case (lexer::token_type::value_separator):
                    return ",";
-                case (token_type::parse_error):
+                case (lexer::token_type::parse_error):
                    return "<parse error>";
-                case (token_type::end_of_input):
+                case (lexer::token_type::end_of_input):
                    return "<end of input>";
            }
        }
-        inline void expect_new(token_type t)
+        inline void expect(typename lexer::token_type t) const
        {
            if (t != last_token)
            {
                std::string error_msg = "parse error - unexpected \'";
-                error_msg += static_cast<char>(m_begin[0]);
+                error_msg += m_lexer.get_string_value();
                error_msg += "\' (" + token_type_name(last_token);
                error_msg += "); expected " + token_type_name(t);
                throw std::invalid_argument(error_msg);
            }
        }
-        /*!
-        The pointer m_begin points to the opening quote of the string, and
-        m_cursor past the closing quote of the string. We create a std::string from
-        the character after the opening quotes (m_begin+1) until the character
-        before the closing quotes (hence subtracting 2 characters from the pointer
-        difference of the two pointers).
-        @return string value of current token without opening and closing quotes
-        @todo Take care of Unicode.
-        */
-        inline std::string get_string() const
-        {
-            return std::string(
-                       reinterpret_cast<const char*>(m_begin + 1),
-                       static_cast<std::size_t>(m_cursor - m_begin - 2)
-                   );
-        }
      private:
        /// the buffer
-        std::string buffer;
+        std::string m_buffer;
-        /// a pointer to the next character to read from the buffer
-        const lexer_char_t* m_cursor = nullptr;
-        /// a pointer past the last character of the buffer
-        const lexer_char_t* m_limit = nullptr;
-        /// a pointer to the beginning of the current token
-        const lexer_char_t* m_begin = nullptr;
        /// the type of the last read token
-        token_type last_token = token_type::uninitialized;
+        typename lexer::token_type last_token = lexer::token_type::uninitialized;
+        lexer m_lexer;
    };
 };

--- a/src/json.hpp.re2c
+++ b/src/json.hpp.re2c
@@ -14,6 +14,7 @@
 #include <type_traits>
 #include <utility>
 #include <vector>
+#include <cmath>
 /*!
 - ObjectType trick from http://stackoverflow.com/a/9860911
@@ -2384,9 +2385,9 @@ class basic_json
    // parser //
    ////////////
-    class parser
+    class lexer
    {
-      private:
+      public:
        /// token types for the parser
        enum class token_type
        {
@@ -2406,17 +2407,133 @@ class basic_json
            end_of_input
        };
-        /// the type of a lexer character
+        inline lexer(const char* s) : m_content(s)
-        using lexer_char_t = unsigned char;
+        {
+            m_start = m_cursor = m_content;
+            m_limit = m_content + strlen(m_content);
+        }
+        inline lexer() = default;
+        /*!max:re2c */
+        inline token_type scan()
+        {
+#define YYFILL(n)
+            /*!re2c
+                re2c:define:YYCURSOR    = m_cursor;
+                re2c:define:YYLIMIT     = m_limit;
+                re2c:define:YYCTYPE     = char;
+                re2c:define:YYCTXMARKER = m_ctxmarker;
+                re2c:define:YYMARKER    = m_marker;
+                re2c:indent:top         = 1;
+                re2c:yyfill:enable      = 0;
+                // structural characters
+                "[" { return token_type::begin_array; }
+                "]" { return token_type::end_array; }
+                "{" { return token_type::begin_object; }
+                "}" { return token_type::end_object; }
+                "," { return token_type::value_separator; }
+                ":" { return token_type::name_separator; }
+                // literal names
+                "null"  { return token_type::literal_null; }
+                "true"  { return token_type::literal_true; }
+                "false" { return token_type::literal_false; }
+                // number
+                decimal_point = [.];
+                digit         = [0-9];
+                digit_1_9     = [1-9];
+                e             = [eE];
+                minus         = [-];
+                plus          = [+];
+                zero          = [0];
+                exp           = e (minus|plus)? digit+;
+                frac          = decimal_point digit+;
+                int           = (zero|digit_1_9 digit*);
+                number        = minus? int frac? exp?;
+                number        { return token_type::value_number; }
+                // string
+                quotation_mark = [\"];
+                escape         = [\\];
+                unescaped      = [^\"\\];
+                escaped        = escape ([\"\\/bfnrt] | [u][0-9a-fA-F]{4});
+                char           = unescaped | escaped;
+                string         = quotation_mark char* quotation_mark;
+                string         { return token_type::value_string; }
+                // end of file
+               '\000'          { return token_type::end_of_input; }
+             */
+        }
+        inline std::string get_string_value() const
+        {
+            return std::string(m_start, static_cast<size_t>(m_cursor - m_start));
+        }
+        /*!
+        The pointer m_begin points to the opening quote of the string, and
+        m_cursor past the closing quote of the string. We create a std::string from
+        the character after the opening quotes (m_begin+1) until the character
+        before the closing quotes (hence subtracting 2 characters from the pointer
+        difference of the two pointers).
+        @return string value of current token without opening and closing quotes
+        @todo Take care of Unicode.
+        */
+        inline std::string get_string() const
+        {
+            return std::string(m_start + 1, static_cast<size_t>(m_cursor - m_start - 2));
+        }
+        inline number_float_t get_number() const
+        {
+            // The pointer m_begin points to the beginning of the
+            // parsed number. We pass this pointer to std::strtod which
+            // sets endptr to the first character past the converted
+            // number. If this pointer is not the same as m_cursor,
+            // then either more or less characters have been used
+            // during the comparison. This can happen for inputs like
+            // "01" which will be treated like number 0 followed by
+            // number 1.
+            // conversion
+            char* endptr;
+            const auto float_val = std::strtod(reinterpret_cast<const char*>(m_start), &endptr);
+            // check if strtod read beyond the end of the lexem
+            if (endptr != m_cursor)
+            {
+                std::cerr << get_string_value() << std::endl;
+                return NAN;
+            }
+            else
+            {
+                return float_val;
+            }
+        }
+      private:
+        const char* m_content = nullptr;
+        const char* m_start = nullptr;
+        const char* m_cursor = nullptr;
+        const char* m_limit = nullptr;
+        const char* m_marker = nullptr;
+        const char* m_ctxmarker = nullptr;
+    };
+    class parser
+    {
      public:
        /// constructor for strings
-        inline parser(const std::string& s) : buffer(s)
+        inline parser(const std::string& s) : m_buffer(s), m_lexer(m_buffer.c_str())
        {
-            // set buffer for RE2C
-            m_cursor = reinterpret_cast<const lexer_char_t*>(buffer.c_str());
-            // set a pointer past the end of the buffer
-            m_limit = m_cursor + buffer.size();
            // read first token
            get_token();
        }
@@ -2428,13 +2545,12 @@ class basic_json
            {
                std::string input_line;
                std::getline(_is, input_line);
-                buffer += input_line;
+                m_buffer += input_line;
            }
-            // set buffer for RE2C
+            // initializer lexer
-            m_cursor = reinterpret_cast<const lexer_char_t*>(buffer.c_str());
+            m_lexer = lexer(m_buffer.c_str());
-            // set a pointer past the end of the buffer
-            m_limit = m_cursor + buffer.size();
            // read first token
            get_token();
        }
@@ -2443,7 +2559,7 @@ class basic_json
        {
            switch (last_token)
            {
-                case (token_type::begin_object):
+                case (lexer::token_type::begin_object):
                {
                    // explicitly set result to object to cope with {}
                    basic_json result(value_t::object);
@@ -2452,7 +2568,7 @@ class basic_json
                    get_token();
                    // closing } -> we are done
-                    if (last_token == token_type::end_object)
+                    if (last_token == lexer::token_type::end_object)
                    {
                        return result;
                    }
@@ -2461,12 +2577,12 @@ class basic_json
                    do
                    {
                        // store key
-                        expect_new(token_type::value_string);
+                        expect(lexer::token_type::value_string);
-                        const auto key = get_string();
+                        const auto key = m_lexer.get_string();
                        // parse separator (:)
                        get_token();
-                        expect_new(token_type::name_separator);
+                        expect(lexer::token_type::name_separator);
                        // parse value
                        get_token();
@@ -2475,16 +2591,16 @@ class basic_json
                        // read next character
                        get_token();
                    }
-                    while (last_token == token_type::value_separator
+                    while (last_token == lexer::token_type::value_separator
                            and get_token() == last_token);
                    // closing }
-                    expect_new(token_type::end_object);
+                    expect(lexer::token_type::end_object);
                    return result;
                }
-                case (token_type::begin_array):
+                case (lexer::token_type::begin_array):
                {
                    // explicitly set result to object to cope with []
                    basic_json result(value_t::array);
@@ -2493,7 +2609,7 @@ class basic_json
                    get_token();
                    // closing ] -> we are done
-                    if (last_token == token_type::end_array)
+                    if (last_token == lexer::token_type::end_array)
                    {
                        return result;
                    }
@@ -2507,59 +2623,47 @@ class basic_json
                        // read next character
                        get_token();
                    }
-                    while (last_token == token_type::value_separator
+                    while (last_token == lexer::token_type::value_separator
                            and get_token() == last_token);
                    // closing ]
-                    expect_new(token_type::end_array);
+                    expect(lexer::token_type::end_array);
                    return result;
                }
-                case (token_type::literal_null):
+                case (lexer::token_type::literal_null):
                {
                    return basic_json(nullptr);
                }
-                case (token_type::value_string):
+                case (lexer::token_type::value_string):
                {
-                    return basic_json(get_string());
+                    return basic_json(m_lexer.get_string());
                }
-                case (token_type::literal_true):
+                case (lexer::token_type::literal_true):
                {
                    return basic_json(true);
                }
-                case (token_type::literal_false):
+                case (lexer::token_type::literal_false):
                {
                    return basic_json(false);
                }
-                case (token_type::value_number):
+                case (lexer::token_type::value_number):
                {
-                    // The pointer m_begin points to the beginning of the
+                    auto float_val = m_lexer.get_number();
-                    // parsed number. We pass this pointer to std::strtod which
-                    // sets endptr to the first character past the converted
-                    // number. If this pointer is not the same as m_cursor,
-                    // then either more or less characters have been used
-                    // during the comparison. This can happen for inputs like
-                    // "01" which will be treated like number 0 followed by
-                    // number 1.
-                    // conversion
+                    if (std::isnan(float_val))
-                    char* endptr;
-                    const auto float_val = std::strtod(reinterpret_cast<const char*>(m_begin), &endptr);
-                    // check if strtod read beyond the end of the lexem
-                    if (reinterpret_cast<const lexer_char_t*>(endptr) != m_cursor)
                    {
                        throw std::invalid_argument(std::string("parse error - ") +
-                                                    reinterpret_cast<const char*>(m_begin) + " is not a number");
+                                                    m_lexer.get_string_value() + " is not a number");
                    }
                    // check if conversion loses precision
-                    const auto int_val = static_cast<int>(float_val);
+                    const auto int_val = static_cast<number_integer_t>(float_val);
                    if (float_val == int_val)
                    {
                        // we basic_json not lose precision -> return int
@@ -2575,7 +2679,7 @@ class basic_json
                default:
                {
                    std::string error_msg = "parse error - unexpected \'";
-                    error_msg += static_cast<char>(m_begin[0]);
+                    error_msg += m_lexer.get_string_value();
                    error_msg += "\' (";
                    error_msg += token_type_name(last_token) + ")";
                    throw std::invalid_argument(error_msg);
@@ -2584,166 +2688,66 @@ class basic_json
        }
      private:
-        /*!
+        /// get next token from lexer
-        This function implements a scanner for JSON. It is specified using
+        inline typename lexer::token_type get_token()
-        regular expressions that try to follow RFC 7159 and ECMA-404 as close
-        as possible. These regular expressions are then translated into a
-        deterministic finite automaton (DFA) by the tool RE2C. As a result, the
-        translated code for this function consists of a large block of code
-        with goto jumps.
-        @return the class of the next token read from the buffer
-        @todo Unicode support needs to be checked.
-        */
-        inline token_type get_token()
        {
-            // needed by RE2C
+            last_token = m_lexer.scan();
-            const lexer_char_t* marker = nullptr;
+            return last_token;
-            // set up RE2C
-            /*!re2c
-                re2c:labelprefix     = "json_parser_";
-                re2c:yyfill:enable   = 0;
-                re2c:define:YYCURSOR = m_cursor;
-                re2c:define:YYCTYPE  = lexer_char_t;
-                re2c:define:YYMARKER = marker;
-                re2c:indent:string   = "    ";
-                re2c:define:YYLIMIT  = m_limit;
-            */
-json_parser_lexer_start:
-            // set current to the begin of the buffer
-            m_begin = m_cursor;
-            if (m_begin == m_limit)
-            {
-                return last_token = token_type::end_of_input;
-            }
-            /*!re2c
-                // whitespace
-                ws = [ \t\n\r]*;
-                ws   { goto json_parser_lexer_start; }
-                // structural characters
-                "[" { return last_token = token_type::begin_array; }
-                "]" { return last_token = token_type::end_array; }
-                "{" { return last_token = token_type::begin_object; }
-                "}" { return last_token = token_type::end_object; }
-                "," { return last_token = token_type::value_separator; }
-                ":" { return last_token = token_type::name_separator; }
-                // literal names
-                "null"  { return last_token = token_type::literal_null; }
-                "true"  { return last_token = token_type::literal_true; }
-                "false" { return last_token = token_type::literal_false; }
-                // number
-                decimal_point = [.];
-                digit         = [0-9];
-                digit_1_9     = [1-9];
-                e             = [eE];
-                minus         = [-];
-                plus          = [+];
-                zero          = [0];
-                exp           = e (minus|plus)? digit+;
-                frac          = decimal_point digit+;
-                int           = (zero|digit_1_9 digit*);
-                number        = minus? int frac? exp?;
-                number        { return last_token = token_type::value_number; }
-                // string
-                quotation_mark = [\"];
-                escape         = [\\];
-                unescaped      = [^\"\\];
-                escaped        = escape ([\"\\/bfnrt] | [u][0-9a-fA-F]{4});
-                char           = unescaped | escaped;
-                string         = quotation_mark char* quotation_mark;
-                string         { return last_token = token_type::value_string; }
-                // anything else is an error
-                * { return last_token = token_type::parse_error; }
-            */
        }
-        inline static std::string token_type_name(token_type t)
+        inline static std::string token_type_name(typename lexer::token_type t)
        {
            switch (t)
            {
-                case (token_type::uninitialized):
+                case (lexer::token_type::uninitialized):
                    return "<uninitialized>";
-                case (token_type::literal_true):
+                case (lexer::token_type::literal_true):
                    return "true literal";
-                case (token_type::literal_false):
+                case (lexer::token_type::literal_false):
                    return "false literal";
-                case (token_type::literal_null):
+                case (lexer::token_type::literal_null):
                    return "null literal";
-                case (token_type::value_string):
+                case (lexer::token_type::value_string):
                    return "string literal";
-                case (token_type::value_number):
+                case (lexer::token_type::value_number):
                    return "number literal";
-                case (token_type::begin_array):
+                case (lexer::token_type::begin_array):
                    return "[";
-                case (token_type::begin_object):
+                case (lexer::token_type::begin_object):
                    return "{";
-                case (token_type::end_array):
+                case (lexer::token_type::end_array):
                    return "]";
-                case (token_type::end_object):
+                case (lexer::token_type::end_object):
                    return "}";
-                case (token_type::name_separator):
+                case (lexer::token_type::name_separator):
                    return ":";
-                case (token_type::value_separator):
+                case (lexer::token_type::value_separator):
                    return ",";
-                case (token_type::parse_error):
+                case (lexer::token_type::parse_error):
                    return "<parse error>";
-                case (token_type::end_of_input):
+                case (lexer::token_type::end_of_input):
                    return "<end of input>";
            }
        }
-        inline void expect_new(token_type t)
+        inline void expect(typename lexer::token_type t) const
        {
            if (t != last_token)
            {
                std::string error_msg = "parse error - unexpected \'";
-                error_msg += static_cast<char>(m_begin[0]);
+                error_msg += m_lexer.get_string_value();
                error_msg += "\' (" + token_type_name(last_token);
                error_msg += "); expected " + token_type_name(t);
                throw std::invalid_argument(error_msg);
            }
        }
-        /*!
-        The pointer m_begin points to the opening quote of the string, and
-        m_cursor past the closing quote of the string. We create a std::string from
-        the character after the opening quotes (m_begin+1) until the character
-        before the closing quotes (hence subtracting 2 characters from the pointer
-        difference of the two pointers).
-        @return string value of current token without opening and closing quotes
-        @todo Take care of Unicode.
-        */
-        inline std::string get_string() const
-        {
-            return std::string(
-                       reinterpret_cast<const char*>(m_begin + 1),
-                       static_cast<std::size_t>(m_cursor - m_begin - 2)
-                   );
-        }
      private:
        /// the buffer
-        std::string buffer;
+        std::string m_buffer;
-        /// a pointer to the next character to read from the buffer
-        const lexer_char_t* m_cursor = nullptr;
-        /// a pointer past the last character of the buffer
-        const lexer_char_t* m_limit = nullptr;
-        /// a pointer to the beginning of the current token
-        const lexer_char_t* m_begin = nullptr;
        /// the type of the last read token
-        token_type last_token = token_type::uninitialized;
+        typename lexer::token_type last_token = lexer::token_type::uninitialized;
+        lexer m_lexer;
    };
 };

--- a/test/unit.cpp
+++ b/test/unit.cpp
@@ -3892,27 +3892,43 @@ TEST_CASE("deserialization")
 {
    SECTION("string")
    {
-        auto s = "[\"foo\",1,2,3,false,{\"one\":1}]";
+        //        auto s = "[\"foo\",1,2,3,false,{\"one\":1}]";
+        //        json j = json::parse(s);
+        //        CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
+        auto s = "null";
        json j = json::parse(s);
-        CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
+        CHECK(j == json());
    }
    SECTION("operator<<")
    {
+        //        std::stringstream ss;
+        //        ss << "[\"foo\",1,2,3,false,{\"one\":1}]";
+        //        json j;
+        //        j << ss;
+        //        CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
        std::stringstream ss;
-        ss << "[\"foo\",1,2,3,false,{\"one\":1}]";
+        ss << "null";
        json j;
        j << ss;
-        CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
+        CHECK(j == json());
    }
    SECTION("operator>>")
    {
+        //        std::stringstream ss;
+        //        ss << "[\"foo\",1,2,3,false,{\"one\":1}]";
+        //        json j;
+        //        ss >> j;
+        //        CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
        std::stringstream ss;
-        ss << "[\"foo\",1,2,3,false,{\"one\":1}]";
+        ss << "null";
        json j;
        ss >> j;
-        CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
+        CHECK(j == json());
    }
 }
@@ -3980,42 +3996,42 @@ TEST_CASE("parser class")
    {
        SECTION("structural characters")
        {
-            CHECK(json::parser("[").last_token == json::parser::token_type::begin_array);
+            CHECK(json::parser("[").last_token == json::lexer::token_type::begin_array);
-            CHECK(json::parser("]").last_token == json::parser::token_type::end_array);
+            CHECK(json::parser("]").last_token == json::lexer::token_type::end_array);
-            CHECK(json::parser("{").last_token == json::parser::token_type::begin_object);
+            CHECK(json::parser("{").last_token == json::lexer::token_type::begin_object);
-            CHECK(json::parser("}").last_token == json::parser::token_type::end_object);
+            CHECK(json::parser("}").last_token == json::lexer::token_type::end_object);
-            CHECK(json::parser(",").last_token == json::parser::token_type::value_separator);
+            CHECK(json::parser(",").last_token == json::lexer::token_type::value_separator);
-            CHECK(json::parser(":").last_token == json::parser::token_type::name_separator);
+            CHECK(json::parser(":").last_token == json::lexer::token_type::name_separator);
        }
        SECTION("literal names")
        {
-            CHECK(json::parser("null").last_token == json::parser::token_type::literal_null);
+            CHECK(json::parser("null").last_token == json::lexer::token_type::literal_null);
-            CHECK(json::parser("true").last_token == json::parser::token_type::literal_true);
+            CHECK(json::parser("true").last_token == json::lexer::token_type::literal_true);
-            CHECK(json::parser("false").last_token == json::parser::token_type::literal_false);
+            CHECK(json::parser("false").last_token == json::lexer::token_type::literal_false);
        }
        SECTION("numbers")
        {
-            CHECK(json::parser("0").last_token == json::parser::token_type::value_number);
+            CHECK(json::parser("0").last_token == json::lexer::token_type::value_number);
-            CHECK(json::parser("1").last_token == json::parser::token_type::value_number);
+            CHECK(json::parser("1").last_token == json::lexer::token_type::value_number);
-            CHECK(json::parser("2").last_token == json::parser::token_type::value_number);
+            CHECK(json::parser("2").last_token == json::lexer::token_type::value_number);
-            CHECK(json::parser("3").last_token == json::parser::token_type::value_number);
+            CHECK(json::parser("3").last_token == json::lexer::token_type::value_number);
-            CHECK(json::parser("4").last_token == json::parser::token_type::value_number);
+            CHECK(json::parser("4").last_token == json::lexer::token_type::value_number);
-            CHECK(json::parser("5").last_token == json::parser::token_type::value_number);
+            CHECK(json::parser("5").last_token == json::lexer::token_type::value_number);
-            CHECK(json::parser("6").last_token == json::parser::token_type::value_number);
+            CHECK(json::parser("6").last_token == json::lexer::token_type::value_number);
-            CHECK(json::parser("7").last_token == json::parser::token_type::value_number);
+            CHECK(json::parser("7").last_token == json::lexer::token_type::value_number);
-            CHECK(json::parser("8").last_token == json::parser::token_type::value_number);
+            CHECK(json::parser("8").last_token == json::lexer::token_type::value_number);
-            CHECK(json::parser("9").last_token == json::parser::token_type::value_number);
+            CHECK(json::parser("9").last_token == json::lexer::token_type::value_number);
        }
        SECTION("whitespace")
        {
-            CHECK(json::parser(" 0").last_token == json::parser::token_type::value_number);
+            CHECK(json::parser(" 0").last_token == json::lexer::token_type::value_number);
-            CHECK(json::parser("\t0").last_token == json::parser::token_type::value_number);
+            CHECK(json::parser("\t0").last_token == json::lexer::token_type::value_number);
-            CHECK(json::parser("\n0").last_token == json::parser::token_type::value_number);
+            CHECK(json::parser("\n0").last_token == json::lexer::token_type::value_number);
-            CHECK(json::parser("\r0").last_token == json::parser::token_type::value_number);
+            CHECK(json::parser("\r0").last_token == json::lexer::token_type::value_number);
-            CHECK(json::parser(" \t\n\r\n\t 0").last_token == json::parser::token_type::value_number);
+            CHECK(json::parser(" \t\n\r\n\t 0").last_token == json::lexer::token_type::value_number);
        }
        /*
@@ -4049,7 +4065,7 @@ TEST_CASE("parser class")
                    case ('9'):
                    case ('"'):
                    {
-                        CHECK(json::parser(s).last_token != json::parser::token_type::parse_error);
+                        CHECK(json::parser(s).last_token != json::lexer::token_type::parse_error);
                        break;
                    }
@@ -4058,13 +4074,13 @@ TEST_CASE("parser class")
                    case ('\n'):
                    case ('\r'):
                    {
-                        CHECK(json::parser(s).last_token == json::parser::token_type::end_of_input);
+                        CHECK(json::parser(s).last_token == json::lexer::token_type::end_of_input);
                        break;
                    }
                    default:
                    {
-                        CHECK(json::parser(s).last_token == json::parser::token_type::parse_error);
+                        CHECK(json::parser(s).last_token == json::lexer::token_type::parse_error);
                        break;
                    }
                }
@@ -4093,19 +4109,19 @@ TEST_CASE("parser class")
    SECTION("token_type_name")
    {
-        CHECK(json::parser::token_type_name(json::parser::token_type::uninitialized) == "<uninitialized>");
+        CHECK(json::parser::token_type_name(json::lexer::token_type::uninitialized) == "<uninitialized>");
-        CHECK(json::parser::token_type_name(json::parser::token_type::literal_true) == "true literal");
+        CHECK(json::parser::token_type_name(json::lexer::token_type::literal_true) == "true literal");
-        CHECK(json::parser::token_type_name(json::parser::token_type::literal_false) == "false literal");
+        CHECK(json::parser::token_type_name(json::lexer::token_type::literal_false) == "false literal");
-        CHECK(json::parser::token_type_name(json::parser::token_type::literal_null) == "null literal");
+        CHECK(json::parser::token_type_name(json::lexer::token_type::literal_null) == "null literal");
-        CHECK(json::parser::token_type_name(json::parser::token_type::value_string) == "string literal");
+        CHECK(json::parser::token_type_name(json::lexer::token_type::value_string) == "string literal");
-        CHECK(json::parser::token_type_name(json::parser::token_type::value_number) == "number literal");
+        CHECK(json::parser::token_type_name(json::lexer::token_type::value_number) == "number literal");
-        CHECK(json::parser::token_type_name(json::parser::token_type::begin_array) == "[");
+        CHECK(json::parser::token_type_name(json::lexer::token_type::begin_array) == "[");
-        CHECK(json::parser::token_type_name(json::parser::token_type::begin_object) == "{");
+        CHECK(json::parser::token_type_name(json::lexer::token_type::begin_object) == "{");
-        CHECK(json::parser::token_type_name(json::parser::token_type::end_array) == "]");
+        CHECK(json::parser::token_type_name(json::lexer::token_type::end_array) == "]");
-        CHECK(json::parser::token_type_name(json::parser::token_type::end_object) == "}");
+        CHECK(json::parser::token_type_name(json::lexer::token_type::end_object) == "}");
-        CHECK(json::parser::token_type_name(json::parser::token_type::name_separator) == ":");
+        CHECK(json::parser::token_type_name(json::lexer::token_type::name_separator) == ":");
-        CHECK(json::parser::token_type_name(json::parser::token_type::value_separator) == ",");
+        CHECK(json::parser::token_type_name(json::lexer::token_type::value_separator) == ",");
-        CHECK(json::parser::token_type_name(json::parser::token_type::parse_error) == "<parse error>");
+        CHECK(json::parser::token_type_name(json::lexer::token_type::parse_error) == "<parse error>");
-        CHECK(json::parser::token_type_name(json::parser::token_type::end_of_input) == "<end of input>");
+        CHECK(json::parser::token_type_name(json::lexer::token_type::end_of_input) == "<end of input>");
    }
 }