Drastically simplified Input class. Now most of the heavy lifting is done by…

Drastically simplified Input class. Now most of the heavy lifting is done by in-built yy_scan_string and yy_scan_bytes. Comment handling will be done by the lexer. git-svn-id: https://angleproject.googlecode.com/svn/trunk@1051 736b8ea6-26fd-11df-bfd4-992fa37f6226

Drastically simplified Input class. Now most of the heavy lifting is done by…
28182485 · alokp@chromium.org · 23ff36a0 · 28182485 · 28182485 · 28182485
Commit 28182485 authored Apr 24, 2012 by alokp@chromium.org
7 changed files
--- a/src/compiler/preprocessor/new/Input.cpp
+++ b/src/compiler/preprocessor/new/Input.cpp
@@ -6,158 +6,11 @@
 #include "Input.h"
-#include <cassert>
-#include <cstdio>
 namespace pp
 {
-Input::Input(int count, const char* const string[], const int length[])
+Input::Input() : count(0), string(0), length(0), index(0), buffer(0)
-    : mCount(count),
-      mString(string),
-      mLength(length),
-      mIndex(-1),
-      mSize(0),
-      mError(kErrorNone),
-      mState(kStateInitial)
-{
-    assert(mCount >= 0);
-    switchToNextString();
-}
-bool Input::eof() const
-{
-    assert(mIndex <= mCount);
-    return mIndex == mCount;
-}
-int Input::read(char* buf, int bufSize)
-{
-    int nread = 0;
-    int startIndex = mIndex;
-    // Keep reading until the buffer is full or the current string is exhausted.
-    while ((mIndex == startIndex) && (nread < bufSize))
-    {
-        int c = getChar();
-        if (c == EOF)
-        {
-            if (mState == kStateBlockComment)
-                mError = kErrorUnexpectedEOF;
-            break;
-        }
-        switch (mState)
-        {
-          case kStateInitial:
-            if (c == '/')
-            {
-                // Potentially a comment.
-                switch (peekChar())
-                {
-                  case '/':
-                    getChar();  // Eat '/'.
-                    mState = kStateLineComment;
-                    break;
-                  case '*':
-                    getChar();  // Eat '*'.
-                    mState = kStateBlockComment;
-                    break;
-                  default:
-                    // Not a comment.
-                    buf[nread++] = c;
-                    break;
-                }
-            } else
-            {
-                buf[nread++] = c;
-            }
-            break;
-          case kStateLineComment:
-            if (c == '\n')
-            {
-                buf[nread++] = c;
-                mState = kStateInitial;
-            }
-            break;
-          case kStateBlockComment:
-            if (c == '*' && (peekChar() == '/'))
-            {
-                getChar();   // Eat '/'.
-                buf[nread++] = ' ';  // Replace comment with whitespace.
-                mState = kStateInitial;
-            } else if (c == '\n')
-            {
-                // Line breaks are never skipped.
-                buf[nread++] = c;
-            }
-            break;
-          default:
-            assert(false);
-            break;
-        }
-    }
-    return nread;
-}
-int Input::getChar()
-{
-    if (eof()) return EOF;
-    const char* str = mString[mIndex];
-    int c = str[mSize++];
-    // Switch to next string if the current one is fully read.
-    int length = stringLength(mIndex);
-    // We never read from empty string.
-    assert(length != 0);
-    if (((length < 0) && (str[mSize] == '\0')) ||
-        ((length > 0) && (mSize == length)))
-        switchToNextString();
-    return c;
-}
-int Input::peekChar()
-{
-    // Save the current read position.
-    int index = mIndex;
-    int size = mSize;
-    int c = getChar();
-    // Restore read position.
-    mIndex = index;
-    mSize = size;
-    return c;
-}
-void Input::switchToNextString()
-{
-    assert(mIndex < mCount);
-    mSize = 0;
-    do
-    {
-        ++mIndex;
-    } while (!eof() && isStringEmpty(mIndex));
-}
-bool Input::isStringEmpty(int index)
-{
-    assert(index < mCount);
-    const char* str = mString[mIndex];
-    int length = stringLength(mIndex);
-    return (length == 0) || ((length < 0) && (str[0] == '\0'));
-}
-int Input::stringLength(int index)
 {
-    assert(index < mCount);
-    return mLength ? mLength[index] : -1;
 }
 }  // namespace pp

--- a/src/compiler/preprocessor/new/Input.h
+++ b/src/compiler/preprocessor/new/Input.h
@@ -10,63 +10,19 @@
 namespace pp
 {
-// Reads the given set of strings into input buffer.
+// Holds lexer input.
-// Strips comments.
+struct Input
-class Input
 {
-  public:
-    Input(int count, const char* const string[], const int length[]);
-    enum Error
-    {
-        kErrorNone,
-        kErrorUnexpectedEOF
-    };
-    Error error() const { return mError; }
-    // Returns the index of string currently being scanned.
-    int stringIndex() const { return mIndex; }
-    // Returns true if EOF has reached.
-    bool eof() const;
-    // Reads up to bufSize characters into buf.
-    // Returns the number of characters read.
-    // It replaces each comment by a whitespace. It reads only one string
-    // at a time so that the lexer has opportunity to update the string number
-    // for meaningful diagnostic messages.
-    int read(char* buf, int bufSize);
-private:
-    enum State
-    {
-        kStateInitial,
-        kStateLineComment,
-        kStateBlockComment
-    };
-    int getChar();
-    int peekChar();
-    // Switches input buffer to the next non-empty string.
-    // This is called when the current string is fully read.
-    void switchToNextString();
-    // Returns true if the given string is empty.
-    bool isStringEmpty(int index);
-    // Return the length of the given string.
-    // Returns a negative value for null-terminated strings.
-    int stringLength(int index);
    // Input.
-    int mCount;
+    int count;
-    const char* const* mString;
+    const char* const* string;
-    const int* mLength;
+    const int* length;
    // Current read position.
-    int mIndex;   // Index of string currently being scanned.
+    int index;  // Index of string currently being scanned.
-    int mSize;    // Size of string already scanned.
+    void* buffer;  // Current buffer handle.
-    // Current error and state.
+    Input();
-    Error mError;
-    State mState;
 };
 }  // namespace pp

--- a/src/compiler/preprocessor/new/Lexer.cpp
+++ b/src/compiler/preprocessor/new/Lexer.cpp
@@ -8,27 +8,30 @@
 #include <cassert>
-#include "Input.h"
 namespace pp
 {
-Lexer::Lexer() : mHandle(0), mLeadingSpace(false)
+Lexer::Lexer() : mHandle(0)
 {
 }
 Lexer::~Lexer()
 {
    destroyLexer();
+    // Make sure the lexer and associated buffer are deleted.
+    assert(mHandle == 0);
+    assert(mInput.buffer == 0);
 }
 bool Lexer::init(int count, const char* const string[], const int length[])
 {
-    assert((count >= 0) && (string));
+    assert((count >= 0) && string);
-    if ((count < 0) || (!string))
-        return false;
+    mInput.count = count;
+    mInput.string = string;
+    mInput.length = length;
-    mInput.reset(new Input(count, string, length));
    return initLexer();
 }

--- a/src/compiler/preprocessor/new/Lexer.h
+++ b/src/compiler/preprocessor/new/Lexer.h
@@ -7,14 +7,12 @@
 #ifndef COMPILER_PREPROCESSOR_LEXER_H_
 #define COMPILER_PREPROCESSOR_LEXER_H_
-#include <memory>
+#include "Input.h"
 #include "pp_utils.h"
 namespace pp
 {
-class Input;
 struct Token;
 class Lexer
@@ -33,8 +31,7 @@ class Lexer
    void destroyLexer();
    void* mHandle;  // Lexer handle.
-    bool mLeadingSpace;
+    Input mInput;  // Input buffer.
-    std::auto_ptr<Input> mInput;  // Input buffer.
 };
 }  // namespace pp

--- a/src/compiler/preprocessor/new/pp.l
+++ b/src/compiler/preprocessor/new/pp.l
@@ -36,13 +36,12 @@ typedef pp::Token::Location YYLTYPE;
        yylloc->string = 0;      \
    } while(0);
-#define YY_INPUT(buf, result, maxSize) \
+// Suppress the default implementation of YY_INPUT which generated
-    result = readInput(buf, maxSize, yyscanner);
+// compiler warnings.
+#define YY_INPUT
-static int readInput(char* buf, int maxSize, yyscan_t scanner);
 %}
-%option noyywrap nounput never-interactive
+%option nounput never-interactive
 %option reentrant bison-bridge bison-locations
 %option prefix="pp"
 %option extra-type="pp::Input*"
@@ -116,7 +115,7 @@ FRACTIONAL_CONSTANT  ({DIGIT}*"."{DIGIT}+)|({DIGIT}+".")
 }
 . {
-    yylval->push_back(yytext[0]);
+    yylval->assign(yytext, yyleng);
    return pp::Token::INVALID_CHARACTER;
 }
@@ -124,41 +123,59 @@ FRACTIONAL_CONSTANT  ({DIGIT}*"."{DIGIT}+)|({DIGIT}+".")
 %%
-int readInput(char* buf, int maxSize, yyscan_t scanner)
+int ppwrap(yyscan_t scanner)
 {
-    int nread = YY_NULL;
    pp::Input* input = yyget_extra(scanner);
-    while (!input->eof() &&
-           (input->error() == pp::Input::kErrorNone) &&
+    // Delete the current buffer before switching to the next one.
-           (nread == YY_NULL))
+    YY_BUFFER_STATE buffer = static_cast<YY_BUFFER_STATE>(input->buffer);
+    if (buffer != NULL)
    {
-        nread = input->read(buf, maxSize);
+        yy_delete_buffer(buffer, scanner);
+        input->buffer = NULL;
    }
-    return nread;
+    int index = std::min(input->index + 1, input->count);
+    if (index == input->count)
+        return 1;  // EOF reached.
+    int length = input->length ? input->length[index] : -1;
+    if (length < 0)  // NULL terminated string.
+        buffer = yy_scan_string(input->string[index], scanner);
+    else
+        buffer = yy_scan_bytes(input->string[index], length, scanner);
+    // TODO(alokp): Increment token location.
+    input->index = index;
+    input->buffer = buffer;
+    return 0;
 }
 namespace pp {
 int Lexer::lex(Token* token)
 {
+    bool leadingSpace = false;
    token->type = yylex(&token->value, &token->location, mHandle);
    while (token->type == ' ')
    {
-        mLeadingSpace = true;
+        leadingSpace = true;
        token->type = yylex(&token->value, &token->location, mHandle);
    }
-    token->setHasLeadingSpace(mLeadingSpace);
+    token->setHasLeadingSpace(leadingSpace);
-    mLeadingSpace = false;
    return token->type;
 }
 bool Lexer::initLexer()
 {
-    if ((mHandle == NULL) && yylex_init_extra(mInput.get(), &mHandle))
+    if ((mHandle == NULL) && yylex_init_extra(&mInput, &mHandle))
        return false;
-    yyrestart(0, mHandle);
+    // Setup first scan string.
+    mInput.index = -1;
+    ppwrap(mHandle);
    return true;
 }
@@ -167,6 +184,13 @@ void Lexer::destroyLexer()
    if (mHandle == NULL)
        return;
+    YY_BUFFER_STATE buffer = static_cast<YY_BUFFER_STATE>(mInput.buffer);
+    if (buffer != NULL)
+    {
+        yy_delete_buffer(buffer, mHandle);
+        mInput.buffer = NULL;
+    }
    yylex_destroy(mHandle);
    mHandle = NULL;
 }

--- a/src/compiler/preprocessor/new/pp_lex.cpp
+++ b/src/compiler/preprocessor/new/pp_lex.cpp
@@ -334,9 +334,6 @@ void ppfree (void * ,yyscan_t yyscanner );
 /* Begin user sect3 */
-#define ppwrap(n) 1
-#define YY_SKIP_YYWRAP
 typedef unsigned char YY_CHAR;
 typedef int yy_state_type;
@@ -520,10 +517,9 @@ typedef pp::Token::Location YYLTYPE;
        yylloc->string = 0;      \
    } while(0);
-#define YY_INPUT(buf, result, maxSize) \
+// Suppress the default implementation of YY_INPUT which generated
-    result = readInput(buf, maxSize, yyscanner);
+// compiler warnings.
+#define YY_INPUT
-static int readInput(char* buf, int maxSize, yyscan_t scanner);
 #define INITIAL 0
@@ -970,7 +966,7 @@ YY_RULE_SETUP
 case 30:
 YY_RULE_SETUP
 {
-    yylval->push_back(yytext[0]);
+    yylval->assign(yytext, yyleng);
    return pp::Token::INVALID_CHARACTER;
 }
 	YY_BREAK
@@ -2116,41 +2112,59 @@ void ppfree (void * ptr , yyscan_t yyscanner)
 #define YYTABLES_NAME "yytables"
-int readInput(char* buf, int maxSize, yyscan_t scanner)
+int ppwrap(yyscan_t scanner)
 {
-    int nread = YY_NULL;
    pp::Input* input = ppget_extra(scanner);
-    while (!input->eof() &&
-           (input->error() == pp::Input::kErrorNone) &&
+    // Delete the current buffer before switching to the next one.
-           (nread == YY_NULL))
+    YY_BUFFER_STATE buffer = static_cast<YY_BUFFER_STATE>(input->buffer);
+    if (buffer != NULL)
    {
-        nread = input->read(buf, maxSize);
+        pp_delete_buffer(buffer,scanner);
+        input->buffer = NULL;
    }
-    return nread;
+    int index = std::min(input->index + 1, input->count);
+    if (index == input->count)
+        return 1;  // EOF reached.
+    int length = input->length ? input->length[index] : -1;
+    if (length < 0)  // NULL terminated string.
+        buffer = pp_scan_string(input->string[index],scanner);
+    else
+        buffer = pp_scan_bytes(input->string[index],length,scanner);
+    // TODO(alokp): Increment token location.
+    input->index = index;
+    input->buffer = buffer;
+    return 0;
 }
 namespace pp {
 int Lexer::lex(Token* token)
 {
+    bool leadingSpace = false;
    token->type = pplex(&token->value,&token->location,mHandle);
    while (token->type == ' ')
    {
-        mLeadingSpace = true;
+        leadingSpace = true;
        token->type = pplex(&token->value,&token->location,mHandle);
    }
-    token->setHasLeadingSpace(mLeadingSpace);
+    token->setHasLeadingSpace(leadingSpace);
-    mLeadingSpace = false;
    return token->type;
 }
 bool Lexer::initLexer()
 {
-    if ((mHandle == NULL) && pplex_init_extra(mInput.get(),&mHandle))
+    if ((mHandle == NULL) && pplex_init_extra(&mInput,&mHandle))
        return false;
-    pprestart(0,mHandle);
+    // Setup first scan string.
+    mInput.index = -1;
+    ppwrap(mHandle);
    return true;
 }
@@ -2159,6 +2173,13 @@ void Lexer::destroyLexer()
    if (mHandle == NULL)
        return;
+    YY_BUFFER_STATE buffer = static_cast<YY_BUFFER_STATE>(mInput.buffer);
+    if (buffer != NULL)
+    {
+        pp_delete_buffer(buffer,mHandle);
+        mInput.buffer = NULL;
+    }
    pplex_destroy(mHandle);
    mHandle = NULL;
 }

--- a/tests/preprocessor_tests/char_test.cpp
+++ b/tests/preprocessor_tests/char_test.cpp
@@ -5,6 +5,7 @@
 //
 #include <algorithm>
+#include <climits>
 #include "gtest/gtest.h"
 #include "Preprocessor.h"
@@ -101,7 +102,7 @@ TEST_P(CharTest, Identified)
 // Note +1 for the max-value in range. It is there because the max-value
 // not included in the range.
-INSTANTIATE_TEST_CASE_P(AllCharacters, CharTest,
+INSTANTIATE_TEST_CASE_P(All, CharTest,
-                        testing::Range(-127, 127 + 1));
+                        testing::Range(CHAR_MIN, CHAR_MAX + 1));
 #endif  // GTEST_HAS_PARAM_TEST