diff options
author | Ben Smith <binjimin@gmail.com> | 2017-08-15 14:36:20 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-08-15 14:36:20 -0700 |
commit | 3d3920f6d9388c46af6725dabb34d98752958d8d (patch) | |
tree | cf77acb6d4140b2fb791f91d4af38d59ad8f66a3 /src/wast-parser-lexer-shared.h | |
parent | 68e8642fea319253466bb3bddece75306f808a69 (diff) | |
download | wabt-3d3920f6d9388c46af6725dabb34d98752958d8d.tar.gz wabt-3d3920f6d9388c46af6725dabb34d98752958d8d.tar.bz2 wabt-3d3920f6d9388c46af6725dabb34d98752958d8d.zip |
Rewrite parser as recursive descent (#591)
* Remove Bison dependency
* Remove pre-generated parser files
* Rename build config from no-re2c-bison to no-re2c
* Add a simple make_unique implementation
* Move handling of module bindings into ir.cc
* Simplify lexer
- Remove lookahead, the parser handles this now
- Unify Token/LexerToken, it only contains terminal values now
- Refactor setting token type and value into one function (e.g.
LITERAL, RETURN => RETURN_LITERAL)
* New Parser
- Uses two tokens of lookahead (use Peek(0) or Peek(1))
- Consume() consumes one token of any kind
- Match(t) consumes the current token if it matches
- PeekMatch(t) returns true iff the token matches, but doesn't consume
- Basic error synchronization; plenty of room for improvement here
Diffstat (limited to 'src/wast-parser-lexer-shared.h')
-rw-r--r-- | src/wast-parser-lexer-shared.h | 114 |
1 files changed, 5 insertions, 109 deletions
diff --git a/src/wast-parser-lexer-shared.h b/src/wast-parser-lexer-shared.h index b5774d58..f3b4607a 100644 --- a/src/wast-parser-lexer-shared.h +++ b/src/wast-parser-lexer-shared.h @@ -18,120 +18,16 @@ #define WABT_WAST_PARSER_LEXER_SHARED_H_ #include <cstdarg> -#include <memory> - -#include "common.h" -#include "error-handler.h" -#include "ir.h" -#include "literal.h" -#include "wast-parser.h" - -#define WABT_WAST_PARSER_STYPE Token -#define WABT_WAST_PARSER_LTYPE Location -#define YYSTYPE WABT_WAST_PARSER_STYPE -#define YYLTYPE WABT_WAST_PARSER_LTYPE namespace wabt { -// Terminal types are C-style structs so they don't need to be allocated. Any -// string memory used by terminals is shared with the lexer and does not need -// to be dellocated. - -struct StringTerminal { - const char* data; - size_t size; - - // Helper functions. - std::string to_string() const { return std::string(data, size); } - string_view to_string_view() const { return string_view(data, size); } -}; - -struct LiteralTerminal { - LiteralType type; - StringTerminal text; -}; - -struct Literal { - explicit Literal(LiteralTerminal terminal) - : type(terminal.type), text(terminal.text.to_string()) {} - - LiteralType type; - std::string text; -}; - -typedef std::vector<std::string> TextVector; - -union Token { - // Terminals - StringTerminal t_text; - Type t_type; - Opcode t_opcode; - LiteralTerminal t_literal; - - Token() {} - - // Non-terminals - // Some of these use pointers to keep the size of Token down; copying the - // tokens is a hotspot when parsing large files. - Action* action; - Block* block; - Catch* catch_; - Command* command; - CommandPtrVector* commands; - Const const_; - ConstVector* consts; - DataSegment* data_segment; - ElemSegment* elem_segment; - Exception* exception; - Export* export_; - Expr* expr; - ExprList* expr_list; - Func* func; - FuncSignature* func_sig; - FuncType* func_type; - Global* global; - Import* import; - Limits limits; - Literal* literal; - Memory* memory; - Module* module; - ModuleField* module_field; - ModuleFieldList* module_fields; - ScriptModule* script_module; - Script* script; - std::string* string; - Table* table; - TextVector* texts; - TryExpr* try_expr; - TypeVector* types; - uint32_t u32; - uint64_t u64; - Var* var; - VarVector* vars; -}; - -struct WastParser { - Script* script; - ErrorHandler* error_handler; - int errors; - /* Cached pointers to reallocated parser buffers, so they don't leak. */ - int16_t* yyssa; - YYSTYPE* yyvsa; - YYLTYPE* yylsa; - WastParseOptions* options; -}; +class ErrorHandler; +struct Location; +class WastLexer; -int WastLexerLex(union Token*, - struct Location*, - WastLexer*, - struct WastParser*); -void WABT_PRINTF_FORMAT(4, 5) WastParserError(struct Location*, - WastLexer*, - struct WastParser*, - const char*, - ...); +// TODO(binji): Move this somewhere else. void WastFormatError(ErrorHandler*, - const struct Location*, + const Location*, WastLexer*, const char* format, va_list); |