Commit 28182485 by alokp@chromium.org

Drastically simplified Input class. Now most of the heavy lifting is done by…

Drastically simplified Input class. Now most of the heavy lifting is done by in-built yy_scan_string and yy_scan_bytes. Comment handling will be done by the lexer. git-svn-id: https://angleproject.googlecode.com/svn/trunk@1051 736b8ea6-26fd-11df-bfd4-992fa37f6226
parent 23ff36a0
...@@ -6,158 +6,11 @@ ...@@ -6,158 +6,11 @@
#include "Input.h" #include "Input.h"
#include <cassert>
#include <cstdio>
namespace pp namespace pp
{ {
Input::Input(int count, const char* const string[], const int length[]) Input::Input() : count(0), string(0), length(0), index(0), buffer(0)
: mCount(count),
mString(string),
mLength(length),
mIndex(-1),
mSize(0),
mError(kErrorNone),
mState(kStateInitial)
{
assert(mCount >= 0);
switchToNextString();
}
bool Input::eof() const
{
assert(mIndex <= mCount);
return mIndex == mCount;
}
int Input::read(char* buf, int bufSize)
{
int nread = 0;
int startIndex = mIndex;
// Keep reading until the buffer is full or the current string is exhausted.
while ((mIndex == startIndex) && (nread < bufSize))
{
int c = getChar();
if (c == EOF)
{
if (mState == kStateBlockComment)
mError = kErrorUnexpectedEOF;
break;
}
switch (mState)
{
case kStateInitial:
if (c == '/')
{
// Potentially a comment.
switch (peekChar())
{
case '/':
getChar(); // Eat '/'.
mState = kStateLineComment;
break;
case '*':
getChar(); // Eat '*'.
mState = kStateBlockComment;
break;
default:
// Not a comment.
buf[nread++] = c;
break;
}
} else
{
buf[nread++] = c;
}
break;
case kStateLineComment:
if (c == '\n')
{
buf[nread++] = c;
mState = kStateInitial;
}
break;
case kStateBlockComment:
if (c == '*' && (peekChar() == '/'))
{
getChar(); // Eat '/'.
buf[nread++] = ' '; // Replace comment with whitespace.
mState = kStateInitial;
} else if (c == '\n')
{
// Line breaks are never skipped.
buf[nread++] = c;
}
break;
default:
assert(false);
break;
}
}
return nread;
}
int Input::getChar()
{
if (eof()) return EOF;
const char* str = mString[mIndex];
int c = str[mSize++];
// Switch to next string if the current one is fully read.
int length = stringLength(mIndex);
// We never read from empty string.
assert(length != 0);
if (((length < 0) && (str[mSize] == '\0')) ||
((length > 0) && (mSize == length)))
switchToNextString();
return c;
}
int Input::peekChar()
{
// Save the current read position.
int index = mIndex;
int size = mSize;
int c = getChar();
// Restore read position.
mIndex = index;
mSize = size;
return c;
}
void Input::switchToNextString()
{
assert(mIndex < mCount);
mSize = 0;
do
{
++mIndex;
} while (!eof() && isStringEmpty(mIndex));
}
bool Input::isStringEmpty(int index)
{
assert(index < mCount);
const char* str = mString[mIndex];
int length = stringLength(mIndex);
return (length == 0) || ((length < 0) && (str[0] == '\0'));
}
int Input::stringLength(int index)
{ {
assert(index < mCount);
return mLength ? mLength[index] : -1;
} }
} // namespace pp } // namespace pp
......
...@@ -10,63 +10,19 @@ ...@@ -10,63 +10,19 @@
namespace pp namespace pp
{ {
// Reads the given set of strings into input buffer. // Holds lexer input.
// Strips comments. struct Input
class Input
{ {
public:
Input(int count, const char* const string[], const int length[]);
enum Error
{
kErrorNone,
kErrorUnexpectedEOF
};
Error error() const { return mError; }
// Returns the index of string currently being scanned.
int stringIndex() const { return mIndex; }
// Returns true if EOF has reached.
bool eof() const;
// Reads up to bufSize characters into buf.
// Returns the number of characters read.
// It replaces each comment by a whitespace. It reads only one string
// at a time so that the lexer has opportunity to update the string number
// for meaningful diagnostic messages.
int read(char* buf, int bufSize);
private:
enum State
{
kStateInitial,
kStateLineComment,
kStateBlockComment
};
int getChar();
int peekChar();
// Switches input buffer to the next non-empty string.
// This is called when the current string is fully read.
void switchToNextString();
// Returns true if the given string is empty.
bool isStringEmpty(int index);
// Return the length of the given string.
// Returns a negative value for null-terminated strings.
int stringLength(int index);
// Input. // Input.
int mCount; int count;
const char* const* mString; const char* const* string;
const int* mLength; const int* length;
// Current read position. // Current read position.
int mIndex; // Index of string currently being scanned. int index; // Index of string currently being scanned.
int mSize; // Size of string already scanned. void* buffer; // Current buffer handle.
// Current error and state. Input();
Error mError;
State mState;
}; };
} // namespace pp } // namespace pp
......
...@@ -8,27 +8,30 @@ ...@@ -8,27 +8,30 @@
#include <cassert> #include <cassert>
#include "Input.h"
namespace pp namespace pp
{ {
Lexer::Lexer() : mHandle(0), mLeadingSpace(false) Lexer::Lexer() : mHandle(0)
{ {
} }
Lexer::~Lexer() Lexer::~Lexer()
{ {
destroyLexer(); destroyLexer();
// Make sure the lexer and associated buffer are deleted.
assert(mHandle == 0);
assert(mInput.buffer == 0);
} }
bool Lexer::init(int count, const char* const string[], const int length[]) bool Lexer::init(int count, const char* const string[], const int length[])
{ {
assert((count >= 0) && (string)); assert((count >= 0) && string);
if ((count < 0) || (!string))
return false; mInput.count = count;
mInput.string = string;
mInput.length = length;
mInput.reset(new Input(count, string, length));
return initLexer(); return initLexer();
} }
......
...@@ -7,14 +7,12 @@ ...@@ -7,14 +7,12 @@
#ifndef COMPILER_PREPROCESSOR_LEXER_H_ #ifndef COMPILER_PREPROCESSOR_LEXER_H_
#define COMPILER_PREPROCESSOR_LEXER_H_ #define COMPILER_PREPROCESSOR_LEXER_H_
#include <memory> #include "Input.h"
#include "pp_utils.h" #include "pp_utils.h"
namespace pp namespace pp
{ {
class Input;
struct Token; struct Token;
class Lexer class Lexer
...@@ -33,8 +31,7 @@ class Lexer ...@@ -33,8 +31,7 @@ class Lexer
void destroyLexer(); void destroyLexer();
void* mHandle; // Lexer handle. void* mHandle; // Lexer handle.
bool mLeadingSpace; Input mInput; // Input buffer.
std::auto_ptr<Input> mInput; // Input buffer.
}; };
} // namespace pp } // namespace pp
......
...@@ -36,13 +36,12 @@ typedef pp::Token::Location YYLTYPE; ...@@ -36,13 +36,12 @@ typedef pp::Token::Location YYLTYPE;
yylloc->string = 0; \ yylloc->string = 0; \
} while(0); } while(0);
#define YY_INPUT(buf, result, maxSize) \ // Suppress the default implementation of YY_INPUT which generated
result = readInput(buf, maxSize, yyscanner); // compiler warnings.
#define YY_INPUT
static int readInput(char* buf, int maxSize, yyscan_t scanner);
%} %}
%option noyywrap nounput never-interactive %option nounput never-interactive
%option reentrant bison-bridge bison-locations %option reentrant bison-bridge bison-locations
%option prefix="pp" %option prefix="pp"
%option extra-type="pp::Input*" %option extra-type="pp::Input*"
...@@ -116,7 +115,7 @@ FRACTIONAL_CONSTANT ({DIGIT}*"."{DIGIT}+)|({DIGIT}+".") ...@@ -116,7 +115,7 @@ FRACTIONAL_CONSTANT ({DIGIT}*"."{DIGIT}+)|({DIGIT}+".")
} }
. { . {
yylval->push_back(yytext[0]); yylval->assign(yytext, yyleng);
return pp::Token::INVALID_CHARACTER; return pp::Token::INVALID_CHARACTER;
} }
...@@ -124,41 +123,59 @@ FRACTIONAL_CONSTANT ({DIGIT}*"."{DIGIT}+)|({DIGIT}+".") ...@@ -124,41 +123,59 @@ FRACTIONAL_CONSTANT ({DIGIT}*"."{DIGIT}+)|({DIGIT}+".")
%% %%
int readInput(char* buf, int maxSize, yyscan_t scanner) int ppwrap(yyscan_t scanner)
{ {
int nread = YY_NULL;
pp::Input* input = yyget_extra(scanner); pp::Input* input = yyget_extra(scanner);
while (!input->eof() &&
(input->error() == pp::Input::kErrorNone) && // Delete the current buffer before switching to the next one.
(nread == YY_NULL)) YY_BUFFER_STATE buffer = static_cast<YY_BUFFER_STATE>(input->buffer);
if (buffer != NULL)
{ {
nread = input->read(buf, maxSize); yy_delete_buffer(buffer, scanner);
input->buffer = NULL;
} }
return nread;
int index = std::min(input->index + 1, input->count);
if (index == input->count)
return 1; // EOF reached.
int length = input->length ? input->length[index] : -1;
if (length < 0) // NULL terminated string.
buffer = yy_scan_string(input->string[index], scanner);
else
buffer = yy_scan_bytes(input->string[index], length, scanner);
// TODO(alokp): Increment token location.
input->index = index;
input->buffer = buffer;
return 0;
} }
namespace pp { namespace pp {
int Lexer::lex(Token* token) int Lexer::lex(Token* token)
{ {
bool leadingSpace = false;
token->type = yylex(&token->value, &token->location, mHandle); token->type = yylex(&token->value, &token->location, mHandle);
while (token->type == ' ') while (token->type == ' ')
{ {
mLeadingSpace = true; leadingSpace = true;
token->type = yylex(&token->value, &token->location, mHandle); token->type = yylex(&token->value, &token->location, mHandle);
} }
token->setHasLeadingSpace(mLeadingSpace); token->setHasLeadingSpace(leadingSpace);
mLeadingSpace = false;
return token->type; return token->type;
} }
bool Lexer::initLexer() bool Lexer::initLexer()
{ {
if ((mHandle == NULL) && yylex_init_extra(mInput.get(), &mHandle)) if ((mHandle == NULL) && yylex_init_extra(&mInput, &mHandle))
return false; return false;
yyrestart(0, mHandle); // Setup first scan string.
mInput.index = -1;
ppwrap(mHandle);
return true; return true;
} }
...@@ -167,6 +184,13 @@ void Lexer::destroyLexer() ...@@ -167,6 +184,13 @@ void Lexer::destroyLexer()
if (mHandle == NULL) if (mHandle == NULL)
return; return;
YY_BUFFER_STATE buffer = static_cast<YY_BUFFER_STATE>(mInput.buffer);
if (buffer != NULL)
{
yy_delete_buffer(buffer, mHandle);
mInput.buffer = NULL;
}
yylex_destroy(mHandle); yylex_destroy(mHandle);
mHandle = NULL; mHandle = NULL;
} }
......
...@@ -334,9 +334,6 @@ void ppfree (void * ,yyscan_t yyscanner ); ...@@ -334,9 +334,6 @@ void ppfree (void * ,yyscan_t yyscanner );
/* Begin user sect3 */ /* Begin user sect3 */
#define ppwrap(n) 1
#define YY_SKIP_YYWRAP
typedef unsigned char YY_CHAR; typedef unsigned char YY_CHAR;
typedef int yy_state_type; typedef int yy_state_type;
...@@ -520,10 +517,9 @@ typedef pp::Token::Location YYLTYPE; ...@@ -520,10 +517,9 @@ typedef pp::Token::Location YYLTYPE;
yylloc->string = 0; \ yylloc->string = 0; \
} while(0); } while(0);
#define YY_INPUT(buf, result, maxSize) \ // Suppress the default implementation of YY_INPUT which generated
result = readInput(buf, maxSize, yyscanner); // compiler warnings.
#define YY_INPUT
static int readInput(char* buf, int maxSize, yyscan_t scanner);
#define INITIAL 0 #define INITIAL 0
...@@ -970,7 +966,7 @@ YY_RULE_SETUP ...@@ -970,7 +966,7 @@ YY_RULE_SETUP
case 30: case 30:
YY_RULE_SETUP YY_RULE_SETUP
{ {
yylval->push_back(yytext[0]); yylval->assign(yytext, yyleng);
return pp::Token::INVALID_CHARACTER; return pp::Token::INVALID_CHARACTER;
} }
YY_BREAK YY_BREAK
...@@ -2116,41 +2112,59 @@ void ppfree (void * ptr , yyscan_t yyscanner) ...@@ -2116,41 +2112,59 @@ void ppfree (void * ptr , yyscan_t yyscanner)
#define YYTABLES_NAME "yytables" #define YYTABLES_NAME "yytables"
int readInput(char* buf, int maxSize, yyscan_t scanner) int ppwrap(yyscan_t scanner)
{ {
int nread = YY_NULL;
pp::Input* input = ppget_extra(scanner); pp::Input* input = ppget_extra(scanner);
while (!input->eof() &&
(input->error() == pp::Input::kErrorNone) && // Delete the current buffer before switching to the next one.
(nread == YY_NULL)) YY_BUFFER_STATE buffer = static_cast<YY_BUFFER_STATE>(input->buffer);
if (buffer != NULL)
{ {
nread = input->read(buf, maxSize); pp_delete_buffer(buffer,scanner);
input->buffer = NULL;
} }
return nread;
int index = std::min(input->index + 1, input->count);
if (index == input->count)
return 1; // EOF reached.
int length = input->length ? input->length[index] : -1;
if (length < 0) // NULL terminated string.
buffer = pp_scan_string(input->string[index],scanner);
else
buffer = pp_scan_bytes(input->string[index],length,scanner);
// TODO(alokp): Increment token location.
input->index = index;
input->buffer = buffer;
return 0;
} }
namespace pp { namespace pp {
int Lexer::lex(Token* token) int Lexer::lex(Token* token)
{ {
bool leadingSpace = false;
token->type = pplex(&token->value,&token->location,mHandle); token->type = pplex(&token->value,&token->location,mHandle);
while (token->type == ' ') while (token->type == ' ')
{ {
mLeadingSpace = true; leadingSpace = true;
token->type = pplex(&token->value,&token->location,mHandle); token->type = pplex(&token->value,&token->location,mHandle);
} }
token->setHasLeadingSpace(mLeadingSpace); token->setHasLeadingSpace(leadingSpace);
mLeadingSpace = false;
return token->type; return token->type;
} }
bool Lexer::initLexer() bool Lexer::initLexer()
{ {
if ((mHandle == NULL) && pplex_init_extra(mInput.get(),&mHandle)) if ((mHandle == NULL) && pplex_init_extra(&mInput,&mHandle))
return false; return false;
pprestart(0,mHandle); // Setup first scan string.
mInput.index = -1;
ppwrap(mHandle);
return true; return true;
} }
...@@ -2159,6 +2173,13 @@ void Lexer::destroyLexer() ...@@ -2159,6 +2173,13 @@ void Lexer::destroyLexer()
if (mHandle == NULL) if (mHandle == NULL)
return; return;
YY_BUFFER_STATE buffer = static_cast<YY_BUFFER_STATE>(mInput.buffer);
if (buffer != NULL)
{
pp_delete_buffer(buffer,mHandle);
mInput.buffer = NULL;
}
pplex_destroy(mHandle); pplex_destroy(mHandle);
mHandle = NULL; mHandle = NULL;
} }
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
// //
#include <algorithm> #include <algorithm>
#include <climits>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "Preprocessor.h" #include "Preprocessor.h"
...@@ -101,7 +102,7 @@ TEST_P(CharTest, Identified) ...@@ -101,7 +102,7 @@ TEST_P(CharTest, Identified)
// Note +1 for the max-value in range. It is there because the max-value // Note +1 for the max-value in range. It is there because the max-value
// not included in the range. // not included in the range.
INSTANTIATE_TEST_CASE_P(AllCharacters, CharTest, INSTANTIATE_TEST_CASE_P(All, CharTest,
testing::Range(-127, 127 + 1)); testing::Range(CHAR_MIN, CHAR_MAX + 1));
#endif // GTEST_HAS_PARAM_TEST #endif // GTEST_HAS_PARAM_TEST
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment