[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v4 22/26] qidl: add lexer library (based on QC p
From: |
Paolo Bonzini |
Subject: |
Re: [Qemu-devel] [PATCH v4 22/26] qidl: add lexer library (based on QC parser) |
Date: |
Tue, 16 Oct 2012 09:26:38 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20121009 Thunderbird/16.0 |
Il 12/10/2012 23:11, Michael Roth ha scritto:
> Adds an abstract Lexer class to handle tokenizer via a
> peek/pop/peekline/popline interface, along with an implementation for C
> based on the lexer from qc.git
>
> Reviewed-by: Paolo Bonzini <address@hidden>
> Signed-off-by: Michael Roth <address@hidden>
Hmm, this does not print a filename and line, which makes it quite bad
from the usability PoV.
Can you squash in the following please?
diff --git a/scripts/lexer.py b/scripts/lexer.py
index 96c6c1a..f457292 100644
--- a/scripts/lexer.py
+++ b/scripts/lexer.py
@@ -16,16 +16,27 @@
class Input(object):
def __init__(self, fp):
self.fp = fp
+ self.filename = fp.name
+ self.lineno = 0
self.line = None
self.offset = 0
self.is_eof = False
self.__fill_buf()
+ def __repr__(self):
+ return "%s:%d" % (str(self.filename), self.lineno)
+
def __fill_buf(self):
if not self.line and not self.is_eof:
self.line = self.fp.readline()
if not self.line:
self.is_eof = True
+ else:
+ self.lineno = self.lineno + 1
+
+ def set_next_line(self, filename, lineno):
+ self.filename = filename
+ self.lineno = lineno - 1
def peek(self):
if self.is_eof:
@@ -101,12 +112,12 @@ class Lexer(object):
def pop_expected(self, type_expected=None, value_expected=None):
self.__ensure_token()
if self.current_type != type_expected:
- raise Exception("expected '%s', got %s %s" %
- (type_expected, self.current_type, self.current_value))
+ raise Exception("%s: expected '%s', got %s %s" %
+ (self.input, type_expected, self.current_type,
self.current_value))
if value_expected != None:
if self.current_value != value_expected:
- raise Exception("expected '%s', got %s" %
- (value_expected, self.current_value))
+ raise Exception("%s: expected '%s', got %s" %
+ (self.input, value_expected, self.current_value))
return self.pop()
def check_token(self, type_expected, value_expected=None):
@@ -300,7 +311,11 @@ class CLexer(Lexer):
token += ch
self.input.pop()
ch = self.input.peek()
- return ('directive', token)
+ if token[1] == ' ':
+ tokens = token.split()
+ self.input.set_next_line(tokens[2][1:-1], int(tokens[1]))
+ else:
+ return ('directive', token)
else:
return ('unknown', ch)
return (None, None)
plus perhaps something to generate errors with locations from qidl_parser.py.
Paolo
> ---
> scripts/lexer.py | 306
> ++++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 306 insertions(+)
> create mode 100644 scripts/lexer.py
>
> diff --git a/scripts/lexer.py b/scripts/lexer.py
> new file mode 100644
> index 0000000..96c6c1a
> --- /dev/null
> +++ b/scripts/lexer.py
> @@ -0,0 +1,306 @@
> +#
> +# QEMU Lexer Library
> +#
> +# Copyright IBM, Corp. 2012
> +#
> +# Authors:
> +# Anthony Liguori <address@hidden>
> +# Michael Roth <address@hidden>
> +#
> +# This work is licensed under the terms of the GNU GPLv2 or later.
> +# See the COPYING file in the top-level directory.
> +#
> +# The lexer code is based off of:
> +# http://www.lysator.liu.se/c/ANSI-C-grammar-l.html
> +
> +class Input(object):
> + def __init__(self, fp):
> + self.fp = fp
> + self.line = None
> + self.offset = 0
> + self.is_eof = False
> + self.__fill_buf()
> +
> + def __fill_buf(self):
> + if not self.line and not self.is_eof:
> + self.line = self.fp.readline()
> + if not self.line:
> + self.is_eof = True
> +
> + def peek(self):
> + if self.is_eof:
> + return ""
> + return self.line[self.offset]
> +
> + def pop(self):
> + if self.is_eof:
> + return ""
> + ch = self.line[self.offset]
> + self.offset += 1
> + if self.offset == len(self.line):
> + self.offset = 0
> + self.line = None
> + self.__fill_buf()
> + return ch
> +
> + def peek_line(self):
> + return self.line
> +
> + def pop_line(self):
> + line = self.line
> + self.line = None
> + self.offset = 0
> + self.__fill_buf()
> + return line
> +
> + def eof(self):
> + return self.is_eof
> +
> +class Lexer(object):
> + def __init__(self, input, ignored_types=[]):
> + self.input = input
> + self.ignored_types = ignored_types
> + self.current_type = None
> + self.current_value = None
> +
> + def get_token(self):
> + raise NotImplemented("derived classes must implement this method")
> +
> + def __ensure_token(self):
> + while self.current_type == None and not self.input.eof():
> + t, v = self.get_token()
> + if t not in self.ignored_types:
> + self.current_type = t
> + self.current_value = v
> +
> + def peek(self):
> + self.__ensure_token()
> + return self.current_value
> +
> + def peek_line(self):
> + self.__ensure_token()
> + return self.input.peek_line()
> +
> + def peek_type(self):
> + self.__ensure_token()
> + return self.current_type
> +
> + def pop(self):
> + self.__ensure_token()
> + v = self.current_value
> + self.current_type = None
> + self.current_value = None
> + return v
> +
> + def pop_line(self):
> + self.__ensure_token()
> + self.current_type = None
> + self.current_value = None
> + return self.input.pop_line()
> +
> + def pop_expected(self, type_expected=None, value_expected=None):
> + self.__ensure_token()
> + if self.current_type != type_expected:
> + raise Exception("expected '%s', got %s %s" %
> + (type_expected, self.current_type, self.current_value))
> + if value_expected != None:
> + if self.current_value != value_expected:
> + raise Exception("expected '%s', got %s" %
> + (value_expected, self.current_value))
> + return self.pop()
> +
> + def check_token(self, type_expected, value_expected=None):
> + self.__ensure_token()
> + if self.current_type != type_expected:
> + return False
> + if value_expected != None:
> + if self.current_value != value_expected:
> + return False
> + return True
> +
> + def eof(self):
> + self.__ensure_token()
> + return self.current_type == None
> +
> +def in_range(ch, start, end):
> + if ch >= start and ch <= end:
> + return True
> + return False
> +
> +# D [0-9]
> +# L [a-zA-Z_]
> +# H [a-fA-F0-9]
> +# E [Ee][+-]?{D}+
> +# FS (f|F|l|L)
> +# IS (u|U|l|L)*
> +
> +def is_D(ch):
> + return in_range(ch, '0', '9')
> +
> +def is_L(ch):
> + return in_range(ch, 'a', 'z') or in_range(ch, 'A', 'Z') or ch == '_'
> +
> +def is_H(ch):
> + return in_range(ch, 'a', 'f') or in_range(ch, 'A', 'F') or is_D(ch)
> +
> +def is_FS(ch):
> + return ch in 'fFlL'
> +
> +def is_IS(ch):
> + return ch in 'uUlL'
> +
> +class CLexer(Lexer):
> + def __init__(self, input, ignored_types=[]):
> + super(CLexer, self).__init__(input, ignored_types)
> +
> + # used internally, external users should use
> + # CLexer.peek()/peek_type()/pop() instead
> + def get_token(self):
> + token = ''
> + while not self.input.eof():
> + ch = self.input.peek()
> +
> + if is_L(ch):
> + token += ch
> + self.input.pop()
> + ch = self.input.peek()
> + while is_L(ch) or is_D(ch):
> + token += ch
> + self.input.pop()
> + ch = self.input.peek()
> + if token in [ 'auto', 'break', 'case', 'const', 'continue',
> + 'default', 'do', 'else', 'enum', 'extern',
> + 'for', 'goto', 'if', 'register', 'return',
> + 'signed', 'sizeof',
> + 'static', 'struct', 'typedef', 'union',
> + 'unsigned', 'volatile', 'while' ]:
> + return (token, token)
> + else:
> + return ('symbol', token)
> + elif ch == "'":
> + token += ch
> + self.input.pop()
> +
> + ch = self.input.peek()
> + if ch == '\\':
> + token += ch
> + self.input.pop()
> + token += self.input.pop()
> + else:
> + token += ch
> + token += self.input.pop()
> + return ('literal', token)
> + elif ch == '"':
> + token += ch
> + self.input.pop()
> +
> + ch = self.input.peek()
> + while ch not in ['', '"']:
> + token += ch
> + self.input.pop()
> + if ch == '\\':
> + token += self.input.pop()
> + ch = self.input.peek()
> + token += ch
> + self.input.pop()
> + return ('literal', token)
> + elif ch in '.><+-*/%&^|!;{},:=()[]~?':
> + token += ch
> + self.input.pop()
> + ch = self.input.peek()
> + tmp_token = token + ch
> + if tmp_token in ['<:']:
> + return ('operator', '[')
> + elif tmp_token in [':>']:
> + return ('operator', ']')
> + elif tmp_token in ['<%']:
> + return ('operator', '{')
> + elif tmp_token in ['%>']:
> + return ('operator', '}')
> + elif tmp_token == '//':
> + token = tmp_token
> + ch = self.input.peek()
> + while ch != '\n' and ch != '':
> + token += ch
> + self.input.pop()
> + ch = self.input.peek()
> + return ('comment', token)
> + elif tmp_token == '/*':
> + token = tmp_token
> + self.input.pop()
> +
> + ch = self.input.peek()
> + while True:
> + while ch != '*':
> + token += ch
> + self.input.pop()
> + ch = self.input.peek()
> + token += ch
> + self.input.pop()
> + ch = self.input.peek()
> + if ch == '/':
> + token += ch
> + self.input.pop()
> + break
> + return ('comment', token)
> + elif tmp_token in [ '+=', '-=', '*=', '/=', '%=', '&=', '^=',
> + '|=', '>>', '<<', '++', '--', '->', '&&',
> + '||', '<=', '>=', '==', '!=' ]:
> + return ('operator', tmp_token)
> + else:
> + return ('operator', token)
> + elif ch == '0':
> + token += ch
> + self.input.pop()
> + ch = self.input.peek()
> + if ch in 'xX':
> + token += ch
> + self.input.pop()
> + ch = self.input.peek()
> + while is_H(ch):
> + token += ch
> + self.input.pop()
> + ch = self.input.peek()
> + while is_IS(ch):
> + token += ch
> + self.input.pop()
> + ch = self.input.peek()
> + elif is_D(ch):
> + token += ch
> + self.input.pop()
> + ch = self.input.peek()
> + while is_D(ch):
> + token += ch
> + self.input.pop()
> + ch = self.input.peek()
> + return ('literal', token)
> + elif is_D(ch):
> + token += ch
> + self.input.pop()
> + ch = self.input.peek()
> + while is_D(ch):
> + token += ch
> + self.input.pop()
> + ch = self.input.peek()
> + return ('literal', token)
> + elif ch in ' \t\v\n\f':
> + token += ch
> + self.input.pop()
> + ch = self.input.peek()
> + while len(ch) and ch in ' \t\v\n\f':
> + token += ch
> + self.input.pop()
> + ch = self.input.peek()
> + return ('whitespace', token)
> + elif ch in '#':
> + token += ch
> + self.input.pop()
> + ch = self.input.peek()
> + while len(ch) and ch != '\n':
> + token += ch
> + self.input.pop()
> + ch = self.input.peek()
> + return ('directive', token)
> + else:
> + return ('unknown', ch)
> + return (None, None)
>
- [Qemu-devel] [PATCH v4 15/26] qapi: add open-coded visitor for struct tm types, (continued)
- [Qemu-devel] [PATCH v4 15/26] qapi: add open-coded visitor for struct tm types, Michael Roth, 2012/10/12
- [Qemu-devel] [PATCH v4 11/26] qapi: QmpInputVisitor, don't re-allocate memory in start_struct, Michael Roth, 2012/10/12
- [Qemu-devel] [PATCH v4 19/26] module additions for schema registration, Michael Roth, 2012/10/12
- [Qemu-devel] [PATCH v4 14/26] qapi: qapi.py, make json parser more robust, Michael Roth, 2012/10/12
- [Qemu-devel] [PATCH v4 17/26] qom-fuse: force single-threaded mode to avoid QMP races, Michael Roth, 2012/10/12
- [Qemu-devel] [PATCH v4 18/26] qom-fuse: workaround for truncated properties > 4096, Michael Roth, 2012/10/12
- [Qemu-devel] [PATCH v4 16/26] qapi: Improve existing docs and document annotated QAPI types, Michael Roth, 2012/10/12
- [Qemu-devel] [PATCH v4 20/26] qdev: move Property-related declarations to qdev-properties.h, Michael Roth, 2012/10/12
- [Qemu-devel] [PATCH v4 21/26] qidl: add documentation, Michael Roth, 2012/10/12
- [Qemu-devel] [PATCH v4 22/26] qidl: add lexer library (based on QC parser), Michael Roth, 2012/10/12
- Re: [Qemu-devel] [PATCH v4 22/26] qidl: add lexer library (based on QC parser),
Paolo Bonzini <=
- [Qemu-devel] [PATCH v4 23/26] qidl: add C parser (based on QC parser), Michael Roth, 2012/10/12
- [Qemu-devel] [PATCH v4 24/26] qidl: add QAPI-based code generator, Michael Roth, 2012/10/12
- Re: [Qemu-devel] [PATCH v4 24/26] qidl: add QAPI-based code generator, Paolo Bonzini, 2012/10/15
- Re: [Qemu-devel] [PATCH v4 24/26] qidl: add QAPI-based code generator, Paolo Bonzini, 2012/10/15
- Re: [Qemu-devel] [PATCH v4 24/26] qidl: add QAPI-based code generator, Michael Roth, 2012/10/15
- Re: [Qemu-devel] [PATCH v4 24/26] qidl: add QAPI-based code generator, Michael Roth, 2012/10/15
- Re: [Qemu-devel] [PATCH v4 24/26] qidl: add QAPI-based code generator, Paolo Bonzini, 2012/10/16
- Re: [Qemu-devel] [PATCH v4 24/26] qidl: add QAPI-based code generator, Michael Roth, 2012/10/18
- Re: [Qemu-devel] [PATCH v4 24/26] qidl: add QAPI-based code generator, Paolo Bonzini, 2012/10/19
[Qemu-devel] [PATCH v4 25/26] qidl: qidl.h, definitions for qidl annotations, Michael Roth, 2012/10/12