Commit 28182485 by alokp@chromium.org

Drastically simplified Input class. Now most of the heavy lifting is done by…

Drastically simplified Input class. Now most of the heavy lifting is done by in-built yy_scan_string and yy_scan_bytes. Comment handling will be done by the lexer. git-svn-id: https://angleproject.googlecode.com/svn/trunk@1051 736b8ea6-26fd-11df-bfd4-992fa37f6226
parent 23ff36a0
......@@ -6,158 +6,11 @@
#include "Input.h"
#include <cassert>
#include <cstdio>
namespace pp
{
Input::Input(int count, const char* const string[], const int length[])
: mCount(count),
mString(string),
mLength(length),
mIndex(-1),
mSize(0),
mError(kErrorNone),
mState(kStateInitial)
{
assert(mCount >= 0);
switchToNextString();
}
bool Input::eof() const
{
assert(mIndex <= mCount);
return mIndex == mCount;
}
int Input::read(char* buf, int bufSize)
{
int nread = 0;
int startIndex = mIndex;
// Keep reading until the buffer is full or the current string is exhausted.
while ((mIndex == startIndex) && (nread < bufSize))
{
int c = getChar();
if (c == EOF)
{
if (mState == kStateBlockComment)
mError = kErrorUnexpectedEOF;
break;
}
switch (mState)
{
case kStateInitial:
if (c == '/')
{
// Potentially a comment.
switch (peekChar())
{
case '/':
getChar(); // Eat '/'.
mState = kStateLineComment;
break;
case '*':
getChar(); // Eat '*'.
mState = kStateBlockComment;
break;
default:
// Not a comment.
buf[nread++] = c;
break;
}
} else
{
buf[nread++] = c;
}
break;
case kStateLineComment:
if (c == '\n')
{
buf[nread++] = c;
mState = kStateInitial;
}
break;
case kStateBlockComment:
if (c == '*' && (peekChar() == '/'))
{
getChar(); // Eat '/'.
buf[nread++] = ' '; // Replace comment with whitespace.
mState = kStateInitial;
} else if (c == '\n')
{
// Line breaks are never skipped.
buf[nread++] = c;
}
break;
default:
assert(false);
break;
}
}
return nread;
}
int Input::getChar()
{
if (eof()) return EOF;
const char* str = mString[mIndex];
int c = str[mSize++];
// Switch to next string if the current one is fully read.
int length = stringLength(mIndex);
// We never read from empty string.
assert(length != 0);
if (((length < 0) && (str[mSize] == '\0')) ||
((length > 0) && (mSize == length)))
switchToNextString();
return c;
}
int Input::peekChar()
{
// Save the current read position.
int index = mIndex;
int size = mSize;
int c = getChar();
// Restore read position.
mIndex = index;
mSize = size;
return c;
}
void Input::switchToNextString()
{
assert(mIndex < mCount);
mSize = 0;
do
{
++mIndex;
} while (!eof() && isStringEmpty(mIndex));
}
bool Input::isStringEmpty(int index)
{
assert(index < mCount);
const char* str = mString[mIndex];
int length = stringLength(mIndex);
return (length == 0) || ((length < 0) && (str[0] == '\0'));
}
int Input::stringLength(int index)
Input::Input() : count(0), string(0), length(0), index(0), buffer(0)
{
assert(index < mCount);
return mLength ? mLength[index] : -1;
}
} // namespace pp
......
......@@ -10,63 +10,19 @@
namespace pp
{
// Reads the given set of strings into input buffer.
// Strips comments.
class Input
// Holds lexer input.
struct Input
{
public:
Input(int count, const char* const string[], const int length[]);
enum Error
{
kErrorNone,
kErrorUnexpectedEOF
};
Error error() const { return mError; }
// Returns the index of string currently being scanned.
int stringIndex() const { return mIndex; }
// Returns true if EOF has reached.
bool eof() const;
// Reads up to bufSize characters into buf.
// Returns the number of characters read.
// It replaces each comment by a whitespace. It reads only one string
// at a time so that the lexer has opportunity to update the string number
// for meaningful diagnostic messages.
int read(char* buf, int bufSize);
private:
enum State
{
kStateInitial,
kStateLineComment,
kStateBlockComment
};
int getChar();
int peekChar();
// Switches input buffer to the next non-empty string.
// This is called when the current string is fully read.
void switchToNextString();
// Returns true if the given string is empty.
bool isStringEmpty(int index);
// Return the length of the given string.
// Returns a negative value for null-terminated strings.
int stringLength(int index);
// Input.
int mCount;
const char* const* mString;
const int* mLength;
int count;
const char* const* string;
const int* length;
// Current read position.
int mIndex; // Index of string currently being scanned.
int mSize; // Size of string already scanned.
int index; // Index of string currently being scanned.
void* buffer; // Current buffer handle.
// Current error and state.
Error mError;
State mState;
Input();
};
} // namespace pp
......
......@@ -8,27 +8,30 @@
#include <cassert>
#include "Input.h"
namespace pp
{
Lexer::Lexer() : mHandle(0), mLeadingSpace(false)
Lexer::Lexer() : mHandle(0)
{
}
Lexer::~Lexer()
{
destroyLexer();
// Make sure the lexer and associated buffer are deleted.
assert(mHandle == 0);
assert(mInput.buffer == 0);
}
bool Lexer::init(int count, const char* const string[], const int length[])
{
assert((count >= 0) && (string));
if ((count < 0) || (!string))
return false;
assert((count >= 0) && string);
mInput.count = count;
mInput.string = string;
mInput.length = length;
mInput.reset(new Input(count, string, length));
return initLexer();
}
......
......@@ -7,14 +7,12 @@
#ifndef COMPILER_PREPROCESSOR_LEXER_H_
#define COMPILER_PREPROCESSOR_LEXER_H_
#include <memory>
#include "Input.h"
#include "pp_utils.h"
namespace pp
{
class Input;
struct Token;
class Lexer
......@@ -33,8 +31,7 @@ class Lexer
void destroyLexer();
void* mHandle; // Lexer handle.
bool mLeadingSpace;
std::auto_ptr<Input> mInput; // Input buffer.
Input mInput; // Input buffer.
};
} // namespace pp
......
......@@ -36,13 +36,12 @@ typedef pp::Token::Location YYLTYPE;
yylloc->string = 0; \
} while(0);
#define YY_INPUT(buf, result, maxSize) \
result = readInput(buf, maxSize, yyscanner);
static int readInput(char* buf, int maxSize, yyscan_t scanner);
// Suppress the default implementation of YY_INPUT which generated
// compiler warnings.
#define YY_INPUT
%}
%option noyywrap nounput never-interactive
%option nounput never-interactive
%option reentrant bison-bridge bison-locations
%option prefix="pp"
%option extra-type="pp::Input*"
......@@ -116,7 +115,7 @@ FRACTIONAL_CONSTANT ({DIGIT}*"."{DIGIT}+)|({DIGIT}+".")
}
. {
yylval->push_back(yytext[0]);
yylval->assign(yytext, yyleng);
return pp::Token::INVALID_CHARACTER;
}
......@@ -124,41 +123,59 @@ FRACTIONAL_CONSTANT ({DIGIT}*"."{DIGIT}+)|({DIGIT}+".")
%%
int readInput(char* buf, int maxSize, yyscan_t scanner)
int ppwrap(yyscan_t scanner)
{
int nread = YY_NULL;
pp::Input* input = yyget_extra(scanner);
while (!input->eof() &&
(input->error() == pp::Input::kErrorNone) &&
(nread == YY_NULL))
// Delete the current buffer before switching to the next one.
YY_BUFFER_STATE buffer = static_cast<YY_BUFFER_STATE>(input->buffer);
if (buffer != NULL)
{
nread = input->read(buf, maxSize);
yy_delete_buffer(buffer, scanner);
input->buffer = NULL;
}
return nread;
int index = std::min(input->index + 1, input->count);
if (index == input->count)
return 1; // EOF reached.
int length = input->length ? input->length[index] : -1;
if (length < 0) // NULL terminated string.
buffer = yy_scan_string(input->string[index], scanner);
else
buffer = yy_scan_bytes(input->string[index], length, scanner);
// TODO(alokp): Increment token location.
input->index = index;
input->buffer = buffer;
return 0;
}
namespace pp {
int Lexer::lex(Token* token)
{
bool leadingSpace = false;
token->type = yylex(&token->value, &token->location, mHandle);
while (token->type == ' ')
{
mLeadingSpace = true;
leadingSpace = true;
token->type = yylex(&token->value, &token->location, mHandle);
}
token->setHasLeadingSpace(mLeadingSpace);
mLeadingSpace = false;
token->setHasLeadingSpace(leadingSpace);
return token->type;
}
bool Lexer::initLexer()
{
if ((mHandle == NULL) && yylex_init_extra(mInput.get(), &mHandle))
if ((mHandle == NULL) && yylex_init_extra(&mInput, &mHandle))
return false;
yyrestart(0, mHandle);
// Setup first scan string.
mInput.index = -1;
ppwrap(mHandle);
return true;
}
......@@ -167,6 +184,13 @@ void Lexer::destroyLexer()
if (mHandle == NULL)
return;
YY_BUFFER_STATE buffer = static_cast<YY_BUFFER_STATE>(mInput.buffer);
if (buffer != NULL)
{
yy_delete_buffer(buffer, mHandle);
mInput.buffer = NULL;
}
yylex_destroy(mHandle);
mHandle = NULL;
}
......
......@@ -334,9 +334,6 @@ void ppfree (void * ,yyscan_t yyscanner );
/* Begin user sect3 */
#define ppwrap(n) 1
#define YY_SKIP_YYWRAP
typedef unsigned char YY_CHAR;
typedef int yy_state_type;
......@@ -520,10 +517,9 @@ typedef pp::Token::Location YYLTYPE;
yylloc->string = 0; \
} while(0);
#define YY_INPUT(buf, result, maxSize) \
result = readInput(buf, maxSize, yyscanner);
static int readInput(char* buf, int maxSize, yyscan_t scanner);
// Suppress the default implementation of YY_INPUT which generated
// compiler warnings.
#define YY_INPUT
#define INITIAL 0
......@@ -970,7 +966,7 @@ YY_RULE_SETUP
case 30:
YY_RULE_SETUP
{
yylval->push_back(yytext[0]);
yylval->assign(yytext, yyleng);
return pp::Token::INVALID_CHARACTER;
}
YY_BREAK
......@@ -2116,41 +2112,59 @@ void ppfree (void * ptr , yyscan_t yyscanner)
#define YYTABLES_NAME "yytables"
int readInput(char* buf, int maxSize, yyscan_t scanner)
int ppwrap(yyscan_t scanner)
{
int nread = YY_NULL;
pp::Input* input = ppget_extra(scanner);
while (!input->eof() &&
(input->error() == pp::Input::kErrorNone) &&
(nread == YY_NULL))
// Delete the current buffer before switching to the next one.
YY_BUFFER_STATE buffer = static_cast<YY_BUFFER_STATE>(input->buffer);
if (buffer != NULL)
{
nread = input->read(buf, maxSize);
pp_delete_buffer(buffer,scanner);
input->buffer = NULL;
}
return nread;
int index = std::min(input->index + 1, input->count);
if (index == input->count)
return 1; // EOF reached.
int length = input->length ? input->length[index] : -1;
if (length < 0) // NULL terminated string.
buffer = pp_scan_string(input->string[index],scanner);
else
buffer = pp_scan_bytes(input->string[index],length,scanner);
// TODO(alokp): Increment token location.
input->index = index;
input->buffer = buffer;
return 0;
}
namespace pp {
int Lexer::lex(Token* token)
{
bool leadingSpace = false;
token->type = pplex(&token->value,&token->location,mHandle);
while (token->type == ' ')
{
mLeadingSpace = true;
leadingSpace = true;
token->type = pplex(&token->value,&token->location,mHandle);
}
token->setHasLeadingSpace(mLeadingSpace);
mLeadingSpace = false;
token->setHasLeadingSpace(leadingSpace);
return token->type;
}
bool Lexer::initLexer()
{
if ((mHandle == NULL) && pplex_init_extra(mInput.get(),&mHandle))
if ((mHandle == NULL) && pplex_init_extra(&mInput,&mHandle))
return false;
pprestart(0,mHandle);
// Setup first scan string.
mInput.index = -1;
ppwrap(mHandle);
return true;
}
......@@ -2159,6 +2173,13 @@ void Lexer::destroyLexer()
if (mHandle == NULL)
return;
YY_BUFFER_STATE buffer = static_cast<YY_BUFFER_STATE>(mInput.buffer);
if (buffer != NULL)
{
pp_delete_buffer(buffer,mHandle);
mInput.buffer = NULL;
}
pplex_destroy(mHandle);
mHandle = NULL;
}
......
......@@ -5,6 +5,7 @@
//
#include <algorithm>
#include <climits>
#include "gtest/gtest.h"
#include "Preprocessor.h"
......@@ -101,7 +102,7 @@ TEST_P(CharTest, Identified)
// Note +1 for the max-value in range. It is there because the max-value
// not included in the range.
INSTANTIATE_TEST_CASE_P(AllCharacters, CharTest,
testing::Range(-127, 127 + 1));
INSTANTIATE_TEST_CASE_P(All, CharTest,
testing::Range(CHAR_MIN, CHAR_MAX + 1));
#endif // GTEST_HAS_PARAM_TEST
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment