commit-gnue
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

r5733 - in trunk/gnue-common/src/external: . plex


From: jcater
Subject: r5733 - in trunk/gnue-common/src/external: . plex
Date: Fri, 16 Apr 2004 18:41:03 -0500 (CDT)

Author: jcater
Date: 2004-04-16 18:41:02 -0500 (Fri, 16 Apr 2004)
New Revision: 5733

Added:
   trunk/gnue-common/src/external/plex/
   trunk/gnue-common/src/external/plex/Actions.py
   trunk/gnue-common/src/external/plex/DFA.py
   trunk/gnue-common/src/external/plex/Errors.py
   trunk/gnue-common/src/external/plex/Lexicons.py
   trunk/gnue-common/src/external/plex/Machines.py
   trunk/gnue-common/src/external/plex/Regexps.py
   trunk/gnue-common/src/external/plex/Scanners.py
   trunk/gnue-common/src/external/plex/Timing.py
   trunk/gnue-common/src/external/plex/Traditional.py
   trunk/gnue-common/src/external/plex/Transitions.py
   trunk/gnue-common/src/external/plex/__init__.py
   trunk/gnue-common/src/external/plex/test_tm.py
Log:
added plex

Added: trunk/gnue-common/src/external/plex/Actions.py
===================================================================
--- trunk/gnue-common/src/external/plex/Actions.py      2004-04-16 23:38:32 UTC 
(rev 5732)
+++ trunk/gnue-common/src/external/plex/Actions.py      2004-04-16 23:41:02 UTC 
(rev 5733)
@@ -0,0 +1,109 @@
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#   Actions for use in token specifications
+#
+#=======================================================================
+
+class Action:
+
+  def same_as(self, other):
+    return self is other
+
+
+class Return(Action):
+  """
+  Internal Plex action which causes |value| to
+  be returned as the value of the associated token
+  """
+
+  value = None
+
+  def __init__(self, value):
+    self.value = value
+
+  def perform(self, token_stream, text):
+    return self.value
+
+  def same_as(self, other):
+    return isinstance(other, Return) and self.value == other.value
+
+  def __repr__(self):
+    return "Return(%s)" % repr(self.value)
+
+
+class Call(Action):
+  """
+  Internal Plex action which causes a function to be called.
+  """
+
+  function = None
+
+  def __init__(self, function):
+    self.function = function
+
+  def perform(self, token_stream, text):
+    return self.function(token_stream, text)
+
+  def __repr__(self):
+    return "Call(%s)" % self.function.__name__
+
+  def same_as(self, other):
+    return isinstance(other, Call) and self.function is other.function
+
+
+class Begin(Action):
+  """
+  Begin(state_name) is a Plex action which causes the Scanner to
+  enter the state |state_name|. See the docstring of Plex.Lexicon
+  for more information.
+  """
+
+  state_name = None
+
+  def __init__(self, state_name):
+    self.state_name = state_name
+
+  def perform(self, token_stream, text):
+    token_stream.begin(self.state_name)
+
+  def __repr__(self):
+    return "Begin(%s)" % (self.state_name,)
+
+  def same_as(self, other):
+    return isinstance(other, Begin) and self.state_name == other.state_name
+
+
+class Ignore(Action):
+  """
+  IGNORE is a Plex action which causes its associated token
+  to be ignored. See the docstring of Plex.Lexicon  for more
+  information.
+  """
+  def perform(self, token_stream, text):
+    return None
+
+  def __repr__(self):
+    return "IGNORE"
+
+IGNORE = Ignore()
+IGNORE.__doc__ = Ignore.__doc__
+
+class Text(Action):
+  """
+  TEXT is a Plex action which causes the text of a token to
+  be returned as the value of the token. See the docstring of
+  Plex.Lexicon  for more information.
+  """
+
+  def perform(self, token_stream, text):
+    return text
+
+  def __repr__(self):
+    return "TEXT"
+
+TEXT = Text()
+TEXT.__doc__ = Text.__doc__
+
+

Added: trunk/gnue-common/src/external/plex/DFA.py
===================================================================
--- trunk/gnue-common/src/external/plex/DFA.py  2004-04-16 23:38:32 UTC (rev 
5732)
+++ trunk/gnue-common/src/external/plex/DFA.py  2004-04-16 23:41:02 UTC (rev 
5733)
@@ -0,0 +1,156 @@
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#   Converting NFA to DFA
+#
+#=======================================================================
+
+import Machines
+from Machines import LOWEST_PRIORITY
+from Transitions import TransitionMap
+
+def nfa_to_dfa(old_machine, debug = None):
+  """
+  Given a nondeterministic Machine, return a new equivalent
+  Machine which is deterministic.
+  """
+  # We build a new machine whose states correspond to sets of states
+  # in the old machine. Initially we add a new state corresponding to
+  # the epsilon-closure of each initial old state. Then we give transitions
+  # to each new state which are the union of all transitions out of any
+  # of the corresponding old states. The new state reached on a given
+  # character is the one corresponding to the set of states reachable
+  # on that character from any of the old states. As new combinations of
+  # old states are created, new states are added as needed until closure
+  # is reached.
+  new_machine = Machines.FastMachine()
+  state_map = StateMap(new_machine)
+  # Seed the process using the initial states of the old machine.
+  # Make the corresponding new states into initial states of the new
+  # machine with the same names.
+  for (key, old_state) in old_machine.initial_states.items():
+    new_state = state_map.old_to_new(epsilon_closure(old_state))
+    new_machine.make_initial_state(key, new_state)
+  # Tricky bit here: we add things to the end of this list while we're
+  # iterating over it. The iteration stops when closure is achieved.
+  for new_state in new_machine.states:
+    transitions = TransitionMap()
+    for old_state in state_map.new_to_old(new_state).keys():
+      for event, old_target_states in old_state.transitions.items():
+        if event and old_target_states:
+          transitions.add_set(event, set_epsilon_closure(old_target_states))
+    for event, old_states in transitions.items():
+      new_machine.add_transitions(new_state, event, 
state_map.old_to_new(old_states))
+  if debug:
+    debug.write("\n===== State Mapping =====\n")
+    state_map.dump(debug)
+  return new_machine
+
+def set_epsilon_closure(state_set):
+  """
+  Given a set of states, return the union of the epsilon
+  closures of its member states.
+  """
+  result = {}
+  for state1 in state_set.keys():
+    for state2 in epsilon_closure(state1).keys():
+      result[state2] = 1
+  return result
+
+def epsilon_closure(state):
+  """
+  Return the set of states reachable from the given state
+  by epsilon moves.
+  """
+  # Cache the result
+  result = state.epsilon_closure
+  if result is None:
+    result = {}
+    state.epsilon_closure = result
+    add_to_epsilon_closure(result, state)
+  return result
+
+def add_to_epsilon_closure(state_set, state):
+  """
+  Recursively add to |state_set| states reachable from the given state
+  by epsilon moves.
+  """
+  if not state_set.get(state, 0):
+    state_set[state] = 1
+    state_set_2 = state.transitions.get_epsilon()
+    if state_set_2:
+      for state2 in state_set_2.keys():
+        add_to_epsilon_closure(state_set, state2)
+
+class StateMap:
+  """
+  Helper class used by nfa_to_dfa() to map back and forth between
+  sets of states from the old machine and states of the new machine.
+  """
+  new_machine     = None # Machine
+  old_to_new_dict = None # {(old_state,...) : new_state}
+  new_to_old_dict = None # {id(new_state) : old_state_set}
+
+  def __init__(self, new_machine):
+    self.new_machine = new_machine
+    self.old_to_new_dict = {}
+    self.new_to_old_dict= {}
+
+  def old_to_new(self, old_state_set):
+    """
+    Return the state of the new machine corresponding to the
+    set of old machine states represented by |state_set|. A new
+    state will be created if necessary. If any of the old states
+    are accepting states, the new state will be an accepting state
+    with the highest priority action from the old states.
+    """
+    key = self.make_key(old_state_set)
+    new_state = self.old_to_new_dict.get(key, None)
+    if not new_state:
+      action = self.highest_priority_action(old_state_set)
+      new_state = self.new_machine.new_state(action)
+      self.old_to_new_dict[key] = new_state
+      self.new_to_old_dict[id(new_state)] = old_state_set
+      #for old_state in old_state_set.keys():
+        #new_state.merge_actions(old_state)
+    return new_state
+  
+  def highest_priority_action(self, state_set):
+    best_action = None
+    best_priority = LOWEST_PRIORITY
+    for state in state_set.keys():
+      priority = state.action_priority
+      if priority > best_priority:
+        best_action = state.action
+        best_priority = priority
+    return best_action
+  
+#      def old_to_new_set(self, old_state_set):
+#              """
+#              Return the new state corresponding to a set of old states as
+#              a singleton set.
+#              """
+#              return {self.old_to_new(old_state_set):1}
+
+  def new_to_old(self, new_state):
+    """Given a new state, return a set of corresponding old states."""
+    return self.new_to_old_dict[id(new_state)]
+
+  def make_key(self, state_set):
+    """
+    Convert a set of states into a uniquified
+    sorted tuple suitable for use as a dictionary key.
+    """
+    lst = state_set.keys()
+    lst.sort()
+    return tuple(lst)
+
+  def dump(self, file):
+    from Transitions import state_set_str
+    for new_state in self.new_machine.states:
+      old_state_set = self.new_to_old_dict[id(new_state)]
+      file.write("   State %s <-- %s\n" % (
+        new_state['number'], state_set_str(old_state_set)))
+    
+

Added: trunk/gnue-common/src/external/plex/Errors.py
===================================================================
--- trunk/gnue-common/src/external/plex/Errors.py       2004-04-16 23:38:32 UTC 
(rev 5732)
+++ trunk/gnue-common/src/external/plex/Errors.py       2004-04-16 23:41:02 UTC 
(rev 5733)
@@ -0,0 +1,52 @@
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#   Exception classes
+#
+#=======================================================================
+
+import exceptions
+
+class PlexError(exceptions.Exception):
+  message = ""
+
+class PlexTypeError(PlexError, TypeError):
+  pass
+
+class PlexValueError(PlexError, ValueError):
+  pass
+
+class InvalidRegex(PlexError):
+  pass
+
+class InvalidToken(PlexError):
+
+  def __init__(self, token_number, message):
+    PlexError.__init__(self, "Token number %d: %s" % (token_number, message))
+
+class InvalidScanner(PlexError):
+  pass
+
+class AmbiguousAction(PlexError):
+  message = "Two tokens with different actions can match the same string"
+
+  def __init__(self):
+    pass
+
+class UnrecognizedInput(PlexError):
+  scanner = None
+  position = None
+  state_name = None
+
+  def __init__(self, scanner, state_name):
+    self.scanner = scanner
+    self.position = scanner.position()
+    self.state_name = state_name
+
+  def __str__(self):
+    return ("'%s', line %d, char %d: Token not recognised in state %s" 
+            % (self.position + (repr(self.state_name),)))
+
+
+

Added: trunk/gnue-common/src/external/plex/Lexicons.py
===================================================================
--- trunk/gnue-common/src/external/plex/Lexicons.py     2004-04-16 23:38:32 UTC 
(rev 5732)
+++ trunk/gnue-common/src/external/plex/Lexicons.py     2004-04-16 23:41:02 UTC 
(rev 5733)
@@ -0,0 +1,192 @@
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#   Lexical Analyser Specification
+#
+#=======================================================================
+
+import types
+
+import Actions
+import DFA
+import Errors
+import Machines
+import Regexps
+
+# debug_flags for Lexicon constructor
+DUMP_NFA = 1
+DUMP_DFA = 2
+
+class State:
+  """
+  This class is used as part of a Plex.Lexicon specification to
+  introduce a user-defined state.
+
+  Constructor:
+
+     State(name, token_specifications)
+  """
+
+  name = None
+  tokens = None
+
+  def __init__(self, name, tokens):
+    self.name = name
+    self.tokens = tokens
+
+class Lexicon:
+  """
+  Lexicon(specification) builds a lexical analyser from the given
+  |specification|. The specification consists of a list of
+  specification items. Each specification item may be either:
+
+     1) A token definition, which is a tuple:
+
+           (pattern, action)
+
+        The |pattern| is a regular axpression built using the
+        constructors defined in the Plex module.
+
+        The |action| is the action to be performed when this pattern
+        is recognised (see below).
+
+     2) A state definition:
+
+           State(name, tokens)
+
+        where |name| is a character string naming the state,
+        and |tokens| is a list of token definitions as
+        above. The meaning and usage of states is described
+        below.
+
+  Actions
+  -------
+
+  The |action| in a token specication may be one of three things:
+
+     1) A function, which is called as follows:
+
+           function(scanner, text)
+
+        where |scanner| is the relevant Scanner instance, and |text|
+        is the matched text. If the function returns anything
+        other than None, that value is returned as the value of the
+        token. If it returns None, scanning continues as if the IGNORE
+        action were specified (see below).
+
+      2) One of the following special actions:
+
+         IGNORE means that the recognised characters will be treated as
+                white space and ignored. Scanning will continue until
+                the next non-ignored token is recognised before returning.
+
+         TEXT   causes the scanned text itself to be returned as the
+                value of the token.
+
+      3) Any other value, which is returned as the value of the token.
+
+  States
+  ------
+
+  At any given time, the scanner is in one of a number of states.
+  Associated with each state is a set of possible tokens. When scanning,
+  only tokens associated with the current state are recognised.
+
+  There is a default state, whose name is the empty string. Token
+  definitions which are not inside any State definition belong to
+  the default state.
+
+  The initial state of the scanner is the default state. The state can
+  be changed in one of two ways:
+
+     1) Using Begin(state_name) as the action of a token.
+
+     2) Calling the begin(state_name) method of the Scanner.
+
+  To change back to the default state, use '' as the state name.
+  """
+
+  machine = None # Machine
+  tables = None # StateTableMachine
+
+  def __init__(self, specifications, debug = None, debug_flags = 7, timings = 
None):
+    if type(specifications) <> types.ListType:
+      raise Errors.InvalidScanner("Scanner definition is not a list")
+    if timings:
+      from Timing import time
+      total_time = 0.0
+      time1 = time()
+    nfa = Machines.Machine()
+    default_initial_state = nfa.new_initial_state('')
+    token_number = 1
+    for spec in specifications:
+      if isinstance(spec, State):
+        user_initial_state = nfa.new_initial_state(spec.name)
+        for token in spec.tokens:
+          self.add_token_to_machine(
+            nfa, user_initial_state, token, token_number)
+          token_number = token_number + 1
+      elif type(spec) == types.TupleType:
+        self.add_token_to_machine(
+          nfa, default_initial_state, spec, token_number)
+        token_number = token_number + 1
+      else:
+        raise Errors.InvalidToken(
+          token_number,
+          "Expected a token definition (tuple) or State instance")
+    if timings:
+      time2 = time()
+      total_time = total_time + (time2 - time1)
+      time3 = time()
+    if debug and (debug_flags & 1):
+      debug.write("\n============= NFA ===========\n")
+      nfa.dump(debug)
+    dfa = DFA.nfa_to_dfa(nfa, debug = (debug_flags & 3) == 3 and debug)
+    if timings:
+      time4 = time()
+      total_time = total_time + (time4 - time3)
+    if debug and (debug_flags & 2):
+      debug.write("\n============= DFA ===========\n")
+      dfa.dump(debug)
+    if timings:
+      timings.write("Constructing NFA : %5.2f\n" % (time2 - time1))
+      timings.write("Converting to DFA: %5.2f\n" % (time4 - time3))
+      timings.write("TOTAL            : %5.2f\n" % total_time)
+    self.machine = dfa
+
+  def add_token_to_machine(self, machine, initial_state, token_spec, 
token_number):
+    try:
+      (re, action_spec) = self.parse_token_definition(token_spec)
+      # Disabled this -- matching empty strings can be useful
+      #if re.nullable:
+      #  raise Errors.InvalidToken(
+      #    token_number, "Pattern can match 0 input symbols")
+      if isinstance(action_spec, Actions.Action):
+        action = action_spec
+      elif callable(action_spec):
+        action = Actions.Call(action_spec)
+      else:
+        action = Actions.Return(action_spec)
+      final_state = machine.new_state()
+      re.build_machine(machine, initial_state, final_state, 
+                       match_bol = 1, nocase = 0)
+      final_state.set_action(action, priority = -token_number)
+    except Errors.PlexError, e:
+      raise e.__class__("Token number %d: %s" % (token_number, e))
+
+  def parse_token_definition(self, token_spec):
+    if type(token_spec) <> types.TupleType:
+      raise Errors.InvalidToken("Token definition is not a tuple")
+    if len(token_spec) <> 2:
+      raise Errors.InvalidToken("Wrong number of items in token definition")
+    pattern, action = token_spec
+    if not isinstance(pattern, Regexps.RE):
+      raise Errors.InvalidToken("Pattern is not an RE instance")
+    return (pattern, action)
+
+  def get_initial_state(self, name):
+    return self.machine.get_initial_state(name)
+
+
+

Added: trunk/gnue-common/src/external/plex/Machines.py
===================================================================
--- trunk/gnue-common/src/external/plex/Machines.py     2004-04-16 23:38:32 UTC 
(rev 5732)
+++ trunk/gnue-common/src/external/plex/Machines.py     2004-04-16 23:41:02 UTC 
(rev 5733)
@@ -0,0 +1,326 @@
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#   Classes for building NFAs and DFAs
+#
+#=======================================================================
+
+import string
+import sys
+from sys import maxint
+from types import TupleType
+
+from Transitions import TransitionMap
+
+LOWEST_PRIORITY = -sys.maxint
+
+class Machine:
+  """A collection of Nodes representing an NFA or DFA."""
+  states = None         # [Node]
+  next_state_number = 1
+  initial_states = None # {(name, bol): Node}
+
+  def __init__(self):
+    self.states = []
+    self.initial_states = {}
+
+  def __del__(self):
+    #print "Destroying", self ###
+    for state in self.states:
+      state.destroy()
+
+  def new_state(self):
+    """Add a new state to the machine and return it."""
+    s = Node()
+    n = self.next_state_number
+    self.next_state_number = n + 1
+    s.number = n
+    self.states.append(s)
+    return s
+
+  def new_initial_state(self, name):
+    state = self.new_state()
+    self.make_initial_state(name, state)
+    return state
+
+  def make_initial_state(self, name, state):
+    self.initial_states[name] = state
+
+  def get_initial_state(self, name):
+    return self.initial_states[name]
+  
+  def dump(self, file):
+    file.write("Plex.Machine:\n")
+    if self.initial_states is not None:
+      file.write("   Initial states:\n")
+      for (name, state) in self.initial_states.items():
+        file.write("      '%s': %d\n" % (name, state.number))
+    for s in self.states:
+      s.dump(file)
+
+class Node:
+  """A state of an NFA or DFA."""
+  transitions = None       # TransitionMap
+  action = None            # Action
+  action_priority = None   # integer
+  number = 0               # for debug output
+  epsilon_closure = None   # used by nfa_to_dfa()
+
+  def __init__(self):
+    # Preinitialise the list of empty transitions, because
+    # the nfa-to-dfa algorithm needs it
+    #self.transitions = {'':[]}
+    self.transitions = TransitionMap()
+    self.action_priority = LOWEST_PRIORITY
+
+  def destroy(self):
+    #print "Destroying", self ###
+    self.transitions = None
+    self.action = None
+    self.epsilon_closure = None
+
+  def add_transition(self, event, new_state):
+    self.transitions.add(event, new_state)
+  
+  def link_to(self, state):
+    """Add an epsilon-move from this state to another state."""
+    self.add_transition('', state)
+
+  def set_action(self, action, priority):
+    """Make this an accepting state with the given action. If 
+    there is already an action, choose the action with highest
+    priority."""
+    if priority > self.action_priority:
+      self.action = action
+      self.action_priority = priority
+
+  def get_action(self):
+    return self.action
+
+  def get_action_priority(self):
+    return self.action_priority
+
+#      def merge_actions(self, other_state):
+#              """Merge actions of other state into this state according
+#    to their priorities."""
+#              action = other_state.get_action()
+#              priority = other_state.get_action_priority()
+#              self.set_action(action, priority)
+
+  def is_accepting(self):
+    return self.action is not None
+
+  def __str__(self):
+    return "State %d" % self.number
+
+  def dump(self, file):
+    import string
+    # Header
+    file.write("   State %d:\n" % self.number)
+    # Transitions
+#              self.dump_transitions(file)
+    self.transitions.dump(file)
+    # Action
+    action = self.action
+    priority = self.action_priority
+    if action is not None:
+      file.write("      %s [priority %d]\n" % (action, priority))
+  
+
+class FastMachine:
+  """
+  FastMachine is a deterministic machine represented in a way that
+  allows fast scanning.
+  """
+  initial_states = None # {state_name:state}
+  states = None         # [state]
+                        # where state = {event:state, 'else':state, 
'action':Action}
+  next_number = 1       # for debugging
+  
+  new_state_template = {
+    '':None, 'bol':None, 'eol':None, 'eof':None, 'else':None
+  }
+  
+  def __init__(self, old_machine = None):
+    self.initial_states = initial_states = {}
+    self.states = []
+    if old_machine:
+      self.old_to_new = old_to_new = {}
+      for old_state in old_machine.states:
+        new_state = self.new_state()
+        old_to_new[old_state] = new_state
+      for name, old_state in old_machine.initial_states.items():
+        initial_states[name] = old_to_new[old_state]
+      for old_state in old_machine.states:
+        new_state = old_to_new[old_state]
+        for event, old_state_set in old_state.transitions.items():
+          if old_state_set:
+            new_state[event] = old_to_new[old_state_set.keys()[0]]
+          else:
+            new_state[event] = None
+        new_state['action'] = old_state.action
+  
+  def __del__(self):
+    for state in self.states:
+      state.clear()
+  
+  def new_state(self, action = None):
+    number = self.next_number
+    self.next_number = number + 1
+    result = self.new_state_template.copy()
+    result['number'] = number
+    result['action'] = action
+    self.states.append(result)
+    return result
+  
+  def make_initial_state(self, name, state):
+    self.initial_states[name] = state
+  
+  def add_transitions(self, state, event, new_state):
+    if type(event) == TupleType:
+      code0, code1 = event
+      if code0 == -maxint:
+        state['else'] = new_state
+      elif code1 <> maxint:
+        while code0 < code1:
+          state[chr(code0)] = new_state
+          code0 = code0 + 1
+    else:
+      state[event] = new_state
+  
+  def get_initial_state(self, name):
+    return self.initial_states[name]
+  
+  def dump(self, file):
+    file.write("Plex.FastMachine:\n")
+    file.write("   Initial states:\n")
+    for name, state in self.initial_states.items():
+      file.write("      %s: %s\n" % (repr(name), state['number']))
+    for state in self.states:
+      self.dump_state(state, file)
+
+  def dump_state(self, state, file):
+    import string
+    # Header
+    file.write("   State %d:\n" % state['number'])
+    # Transitions
+    self.dump_transitions(state, file)
+    # Action
+    action = state['action']
+    if action is not None:
+      file.write("      %s\n" % action)
+  
+  def dump_transitions(self, state, file):
+    chars_leading_to_state = {}
+    special_to_state = {}
+    for (c, s) in state.items():
+      if len(c) == 1:
+        chars = chars_leading_to_state.get(id(s), None)
+        if chars is None:
+          chars = []
+          chars_leading_to_state[id(s)] = chars
+        chars.append(c)
+      elif len(c) <= 4:
+        special_to_state[c] = s
+    ranges_to_state = {}
+    for state in self.states:
+      char_list = chars_leading_to_state.get(id(state), None)
+      if char_list:
+        ranges = self.chars_to_ranges(char_list)
+        ranges_to_state[ranges] = state
+    ranges_list = ranges_to_state.keys()
+    ranges_list.sort()
+    for ranges in ranges_list:
+      key = self.ranges_to_string(ranges)
+      state = ranges_to_state[ranges]
+      file.write("      %s --> State %d\n" % (key, state['number']))
+    for key in ('bol', 'eol', 'eof', 'else'):
+      state = special_to_state.get(key, None)
+      if state:
+        file.write("      %s --> State %d\n" % (key, state['number']))
+
+  def chars_to_ranges(self, char_list):
+    char_list.sort()
+    i = 0
+    n = len(char_list)
+    result = []
+    while i < n:
+      c1 = ord(char_list[i])
+      c2 = c1
+      i = i + 1
+      while i < n and ord(char_list[i]) == c2 + 1:
+        i = i + 1
+        c2 = c2 + 1
+      result.append((chr(c1), chr(c2)))
+    return tuple(result)
+  
+  def ranges_to_string(self, range_list):
+    return string.join(map(self.range_to_string, range_list), ",")
+  
+  def range_to_string(self, (c1, c2)):
+    if c1 == c2:
+      return repr(c1)
+    else:
+      return "%s..%s" % (repr(c1), repr(c2))
+##
+## (Superseded by Machines.FastMachine)
+##
+## class StateTableMachine:
+##   """
+##   StateTableMachine is an alternative representation of a Machine
+##   that can be run more efficiently.
+##   """
+##   initial_states = None # {state_name:state_index}
+##   states = None # [([state] indexed by char code, Action)] 
+  
+##   special_map = {'bol':256, 'eol':257, 'eof':258}
+  
+##   def __init__(self, m):
+##     """
+##     Initialise StateTableMachine from Machine |m|.
+##     """
+##     initial_states = self.initial_states = {}
+##     states = self.states = [None]
+##     old_to_new = {}
+##     i = 1
+##     for old_state in m.states:
+##       new_state = ([0] * 259, old_state.get_action())
+##       states.append(new_state)
+##       old_to_new[old_state] = i # new_state
+##       i = i + 1
+##     for name, old_state in m.initial_states.items():
+##       initial_states[name] = old_to_new[old_state]
+##     for old_state in m.states:
+##       new_state_index = old_to_new[old_state]
+##       new_table = states[new_state_index][0]
+##       transitions = old_state.transitions
+##       for c, old_targets in transitions.items():
+##         if old_targets:
+##           old_target = old_targets[0]
+##           new_target_index = old_to_new[old_target]
+##           if len(c) == 1:
+##             a = ord(c)
+##           else:
+##             a = self.special_map[c]
+##           new_table[a] = states[new_target_index]
+
+##   def dump(self, f):
+##     f.write("Plex.StateTableMachine:\n")
+##     f.write("    Initial states:\n")
+##     for name, index in self.initial_states.items():
+##       f.write("        %s: State %d\n" % (
+##         repr(name), id(self.states[index])))
+##     for i in xrange(1, len(self.states)):
+##       table, action = self.states[i]
+##       f.write("    State %d:" % i)
+##       if action:
+##         f.write("%s" % action)
+##       f.write("\n")
+##       f.write("        %s\n" % map(id,table))
+      
+      
+      
+      
+
+

Added: trunk/gnue-common/src/external/plex/Regexps.py
===================================================================
--- trunk/gnue-common/src/external/plex/Regexps.py      2004-04-16 23:38:32 UTC 
(rev 5732)
+++ trunk/gnue-common/src/external/plex/Regexps.py      2004-04-16 23:41:02 UTC 
(rev 5733)
@@ -0,0 +1,557 @@
+#=======================================================================
+#
+#       Python Lexical Analyser
+#
+#       Regular Expressions
+#
+#=======================================================================
+
+import array
+import string
+import types
+from sys import maxint
+
+import Errors
+
+#
+#       Constants
+#
+
+BOL = 'bol'
+EOL = 'eol'
+EOF = 'eof'
+
+nl_code = ord('\n')
+
+#
+#       Helper functions
+#
+
+def chars_to_ranges(s):
+  """
+  Return a list of character codes consisting of pairs
+  [code1a, code1b, code2a, code2b,...] which cover all
+  the characters in |s|.
+  """
+  char_list = list(s)
+  char_list.sort()
+  i = 0
+  n = len(char_list)
+  result = []
+  while i < n:
+    code1 = ord(char_list[i])
+    code2 = code1 + 1
+    i = i + 1
+    while i < n and code2 >= ord(char_list[i]):
+      code2 = code2 + 1
+      i = i + 1
+    result.append(code1)
+    result.append(code2)
+  return result
+
+def uppercase_range(code1, code2):
+  """
+  If the range of characters from code1 to code2-1 includes any
+  lower case letters, return the corresponding upper case range.
+  """
+  code3 = max(code1, ord('a'))
+  code4 = min(code2, ord('z') + 1)
+  if code3 < code4:
+    d = ord('A') - ord('a')
+    return (code3 + d, code4 + d)
+  else:
+    return None
+
+def lowercase_range(code1, code2):
+  """
+  If the range of characters from code1 to code2-1 includes any
+  upper case letters, return the corresponding lower case range.
+  """
+  code3 = max(code1, ord('A'))
+  code4 = min(code2, ord('Z') + 1)
+  if code3 < code4:
+    d = ord('a') - ord('A')
+    return (code3 + d, code4 + d)
+  else:
+    return None
+
+def CodeRanges(code_list):
+  """
+  Given a list of codes as returned by chars_to_ranges, return
+  an RE which will match a character in any of the ranges.
+  """
+  re_list = []
+  for i in xrange(0, len(code_list), 2):
+    re_list.append(CodeRange(code_list[i], code_list[i + 1]))
+  return apply(Alt, tuple(re_list))
+
+def CodeRange(code1, code2):
+  """
+  CodeRange(code1, code2) is an RE which matches any character
+  with a code |c| in the range |code1| <= |c| < |code2|.
+  """
+  if code1 <= nl_code < code2:
+    return Alt(RawCodeRange(code1, nl_code), 
+               RawNewline, 
+               RawCodeRange(nl_code + 1, code2))
+  else:
+    return RawCodeRange(code1, code2)
+
+#
+#       Abstract classes
+#
+
+class RE:
+  """RE is the base class for regular expression constructors.
+  The following operators are defined on REs:
+
+     re1 + re2          is an RE which matches |re1| followed by |re2|
+     re1 | re2          is an RE which matches either |re1| or |re2|
+  """
+
+  nullable = 1 # True if this RE can match 0 input symbols
+  match_nl = 1 # True if this RE can match a string ending with '\n'
+  str = None    # Set to a string to override the class's __str__ result
+  
+  def build_machine(self, machine, initial_state, final_state, 
+                    match_bol, nocase):
+    """
+    This method should add states to |machine| to implement this
+    RE, starting at |initial_state| and ending at |final_state|.
+    If |match_bol| is true, the RE must be able to match at the
+    beginning of a line. If nocase is true, upper and lower case
+    letters should be treated as equivalent.
+    """
+    raise exceptions.UnimplementedMethod("%s.build_machine not implemented" % 
+      self.__class__.__name__)
+  
+  def build_opt(self, m, initial_state, c):
+    """
+    Given a state |s| of machine |m|, return a new state
+    reachable from |s| on character |c| or epsilon.
+    """
+    s = m.new_state()
+    initial_state.link_to(s)
+    initial_state.add_transition(c, s)
+    return s
+
+  def __add__(self, other):
+    return Seq(self, other)
+
+  def __or__(self, other):
+    return Alt(self, other)
+
+  def __str__(self):
+    if self.str:
+      return self.str
+    else:
+      return self.calc_str()
+
+  def check_re(self, num, value):
+    if not isinstance(value, RE):
+      self.wrong_type(num, value, "Plex.RE instance")
+
+  def check_string(self, num, value):
+    if type(value) <> type(''):
+      self.wrong_type(num, value, "string")
+  
+  def check_char(self, num, value):
+    self.check_string(num, value)
+    if len(value) <> 1:
+      raise Errors.PlexValueError("Invalid value for argument %d of Plex.%s."
+        "Expected a string of length 1, got: %s" % (
+          num, self.__class__.__name__, repr(value)))
+
+  def wrong_type(self, num, value, expected):
+    if type(value) == types.InstanceType:
+        got = "%s.%s instance" % (
+          value.__class__.__module__, value.__class__.__name__)
+    else:
+      got = type(value).__name__
+    raise Errors.PlexTypeError("Invalid type for argument %d of Plex.%s "
+                    "(expected %s, got %s" % (
+                      num, self.__class__.__name__, expected, got))
+  
+#
+#       Primitive RE constructors
+#       -------------------------
+#
+#       These are the basic REs from which all others are built.
+#
+
+## class Char(RE):
+##      """
+##      Char(c) is an RE which matches the character |c|.
+##      """
+  
+##      nullable = 0
+  
+##      def __init__(self, char):
+##              self.char = char
+##              self.match_nl = char == '\n'
+    
+##      def build_machine(self, m, initial_state, final_state, match_bol, 
nocase):
+##              c = self.char
+##              if match_bol and c <> BOL:
+##                      s1 = self.build_opt(m, initial_state, BOL)
+##              else:
+##                      s1 = initial_state
+##              if c == '\n' or c == EOF:
+##                      s1 = self.build_opt(m, s1, EOL)
+##              if len(c) == 1:
+##                      code = ord(self.char)
+##                      s1.add_transition((code, code+1), final_state)
+##                      if nocase and is_letter_code(code):
+##                              code2 = other_case_code(code)
+##                              s1.add_transition((code2, code2+1), 
final_state)
+##              else:
+##                      s1.add_transition(c, final_state)
+
+##      def calc_str(self):
+##              return "Char(%s)" % repr(self.char)
+
+def Char(c):
+  """
+  Char(c) is an RE which matches the character |c|.
+  """
+  if len(c) == 1:
+    result = CodeRange(ord(c), ord(c) + 1)
+  else:
+    result = SpecialSymbol(c)
+  result.str = "Char(%s)" % repr(c)
+  return result
+
+class RawCodeRange(RE):
+  """
+  RawCodeRange(code1, code2) is a low-level RE which matches any character
+  with a code |c| in the range |code1| <= |c| < |code2|, where the range
+  does not include newline. For internal use only.
+  """
+  nullable = 0
+  match_nl = 0
+  range = None                                  # (code, code)
+  uppercase_range = None # (code, code) or None
+  lowercase_range = None # (code, code) or None
+  
+  def __init__(self, code1, code2):
+    self.range = (code1, code2)
+    self.uppercase_range = uppercase_range(code1, code2)
+    self.lowercase_range = lowercase_range(code1, code2)
+  
+  def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+    if match_bol:
+      initial_state = self.build_opt(m, initial_state, BOL)
+    initial_state.add_transition(self.range, final_state)
+    if nocase:
+      if self.uppercase_range:
+        initial_state.add_transition(self.uppercase_range, final_state)
+      if self.lowercase_range:
+        initial_state.add_transition(self.lowercase_range, final_state)
+  
+  def calc_str(self):
+    return "CodeRange(%d,%d)" % (self.code1, self.code2)
+
+class _RawNewline(RE):
+  """
+  RawNewline is a low-level RE which matches a newline character.
+  For internal use only.
+  """
+  nullable = 0
+  match_nl = 1
+
+  def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+    if match_bol:
+      initial_state = self.build_opt(m, initial_state, BOL)
+    s = self.build_opt(m, initial_state, EOL)
+    s.add_transition((nl_code, nl_code + 1), final_state)
+
+RawNewline = _RawNewline()
+
+
+class SpecialSymbol(RE):
+  """
+  SpecialSymbol(sym) is an RE which matches the special input
+  symbol |sym|, which is one of BOL, EOL or EOF.
+  """
+  nullable = 0
+  match_nl = 0
+  sym = None
+
+  def __init__(self, sym):
+    self.sym = sym
+
+  def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+    # Sequences 'bol bol' and 'bol eof' are impossible, so only need
+    # to allow for bol if sym is eol
+    if match_bol and self.sym == EOL:
+      initial_state = self.build_opt(m, initial_state, BOL)
+    initial_state.add_transition(self.sym, final_state)
+
+
+class Seq(RE):
+  """Seq(re1, re2, re3...) is an RE which matches |re1| followed by
+  |re2| followed by |re3|..."""
+
+  def __init__(self, *re_list):
+    nullable = 1
+    for i in xrange(len(re_list)):
+      re = re_list[i]
+      self.check_re(i, re)
+      nullable = nullable and re.nullable
+    self.re_list = re_list
+    self.nullable = nullable
+    i = len(re_list)
+    match_nl = 0
+    while i:
+      i = i - 1
+      re = re_list[i]
+      if re.match_nl:
+        match_nl = 1
+        break
+      if not re.nullable:
+        break
+    self.match_nl = match_nl
+    
+  def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+    re_list = self.re_list
+    if len(re_list) == 0:
+      initial_state.link_to(final_state)
+    else:
+      s1 = initial_state
+      n = len(re_list)
+      for i in xrange(n):
+        if i < n - 1:
+          s2 = m.new_state()
+        else:
+          s2 = final_state
+        re = re_list[i]
+        re.build_machine(m, s1, s2, match_bol, nocase)
+        s1 = s2
+        match_bol = re.match_nl or (match_bol and re.nullable)
+
+  def calc_str(self):
+    return "Seq(%s)" % string.join(map(str, self.re_list), ",")
+
+
+class Alt(RE):
+  """Alt(re1, re2, re3...) is an RE which matches either |re1| or
+  |re2| or |re3|..."""
+
+  def __init__(self, *re_list):
+    self.re_list = re_list
+    nullable = 0
+    match_nl = 0
+    nullable_res = []
+    non_nullable_res = []
+    i = 1
+    for re in re_list:
+      self.check_re(i, re)
+      if re.nullable:
+        nullable_res.append(re)
+        nullable = 1
+      else:
+        non_nullable_res.append(re)
+      if re.match_nl:
+        match_nl = 1
+      i = i + 1
+    self.nullable_res = nullable_res
+    self.non_nullable_res = non_nullable_res
+    self.nullable = nullable
+    self.match_nl = match_nl
+
+  def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+    for re in self.nullable_res:
+      re.build_machine(m, initial_state, final_state, match_bol, nocase)
+    if self.non_nullable_res:
+      if match_bol:
+        initial_state = self.build_opt(m, initial_state, BOL)
+      for re in self.non_nullable_res:
+        re.build_machine(m, initial_state, final_state, 0, nocase)
+
+  def calc_str(self):
+    return "Alt(%s)" % string.join(map(str, self.re_list), ",")
+
+
+class Rep1(RE):
+  """Rep1(re) is an RE which matches one or more repetitions of |re|."""
+
+  def __init__(self, re):
+    self.check_re(1, re)
+    self.re = re
+    self.nullable = re.nullable
+    self.match_nl = re.match_nl
+
+  def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+    s1 = m.new_state()
+    s2 = m.new_state()
+    initial_state.link_to(s1)
+    self.re.build_machine(m, s1, s2, match_bol or self.re.match_nl, nocase)
+    s2.link_to(s1)
+    s2.link_to(final_state)
+
+  def calc_str(self):
+    return "Rep1(%s)" % self.re
+
+
+class SwitchCase(RE):
+  """
+  SwitchCase(re, nocase) is an RE which matches the same strings as RE, 
+  but treating upper and lower case letters according to |nocase|. If
+  |nocase| is true, case is ignored, otherwise it is not.
+  """
+  re = None
+  nocase = None
+
+  def __init__(self, re, nocase):
+    self.re = re
+    self.nocase = nocase
+    self.nullable = re.nullable
+    self.match_nl = re.match_nl
+
+  def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+    self.re.build_machine(m, initial_state, final_state, match_bol, 
+                          self.nocase)
+
+  def calc_str(self):
+    if self.nocase:
+      name = "NoCase"
+    else:
+      name = "Case"
+    return "%s(%s)" % (name, self.re)
+
+#
+#       Composite RE constructors
+#       -------------------------
+#
+#       These REs are defined in terms of the primitive REs.
+#
+
+Empty = Seq()
+Empty.__doc__ = \
+  """
+  Empty is an RE which matches the empty string.
+  """
+Empty.str = "Empty"
+
+def Str1(s):
+  """
+  Str1(s) is an RE which matches the literal string |s|.
+  """
+  result = apply(Seq, tuple(map(Char, s)))
+  result.str = "Str(%s)" % repr(s)
+  return result
+
+def Str(*strs):
+  """
+  Str(s) is an RE which matches the literal string |s|.
+  Str(s1, s2, s3, ...) is an RE which matches any of |s1| or |s2| or |s3|...
+  """
+  if len(strs) == 1:
+    return Str1(strs[0])
+  else:
+    result = apply(Alt, tuple(map(Str1, strs)))
+    result.str = "Str(%s)" % string.join(map(repr, strs), ",")
+    return result
+
+def Any(s):
+  """
+  Any(s) is an RE which matches any character in the string |s|.
+  """
+  #result = apply(Alt, tuple(map(Char, s)))
+  result = CodeRanges(chars_to_ranges(s))
+  result.str = "Any(%s)" % repr(s)
+  return result
+
+def AnyBut(s):
+  """
+  AnyBut(s) is an RE which matches any character (including
+  newline) which is not in the string |s|.
+  """
+  ranges = chars_to_ranges(s)
+  ranges.insert(0, -maxint)
+  ranges.append(maxint)
+  result = CodeRanges(ranges)
+  result.str = "AnyBut(%s)" % repr(s)
+  return result
+
+AnyChar = AnyBut("")
+AnyChar.__doc__ = \
+  """
+  AnyChar is an RE which matches any single character (including a newline).
+  """
+AnyChar.str = "AnyChar"
+
+def Range(s1, s2 = None):
+  """
+  Range(c1, c2) is an RE which matches any single character in the range
+  |c1| to |c2| inclusive.
+  Range(s) where |s| is a string of even length is an RE which matches
+  any single character in the ranges |s[0]| to |s[1]|, |s[2]| to |s[3]|,...
+  """
+  if s2:
+    result = CodeRange(ord(s1), ord(s2) + 1)
+    result.str = "Range(%s,%s)" % (s1, s2)
+  else:
+    ranges = []
+    for i in range(0, len(s1), 2):
+      ranges.append(CodeRange(ord(s1[i]), ord(s1[i+1]) + 1))
+    result = apply(Alt, tuple(ranges))
+    result.str = "Range(%s)" % repr(s1)
+  return result
+
+def Opt(re):
+  """
+  Opt(re) is an RE which matches either |re| or the empty string.
+  """
+  result = Alt(re, Empty)
+  result.str = "Opt(%s)" % re
+  return result
+
+def Rep(re):
+  """
+  Rep(re) is an RE which matches zero or more repetitions of |re|.
+  """
+  result = Opt(Rep1(re))
+  result.str = "Rep(%s)" % re
+  return result
+
+def NoCase(re):
+  """
+  NoCase(re) is an RE which matches the same strings as RE, but treating
+  upper and lower case letters as equivalent.
+  """
+  return SwitchCase(re, nocase = 1)
+
+def Case(re):
+  """
+  Case(re) is an RE which matches the same strings as RE, but treating
+  upper and lower case letters as distinct, i.e. it cancels the effect 
+  of any enclosing NoCase().
+  """
+  return SwitchCase(re, nocase = 0)
+
+#
+#       RE Constants
+#
+  
+Bol = Char(BOL)
+Bol.__doc__ = \
+  """
+  Bol is an RE which matches the beginning of a line.
+  """
+Bol.str = "Bol"
+
+Eol = Char(EOL)
+Eol.__doc__ = \
+  """
+  Eol is an RE which matches the end of a line.
+  """
+Eol.str = "Eol"
+
+Eof = Char(EOF)
+Eof.__doc__ = \
+  """
+  Eof is an RE which matches the end of the file.
+  """
+Eof.str = "Eof"
+

Added: trunk/gnue-common/src/external/plex/Scanners.py
===================================================================
--- trunk/gnue-common/src/external/plex/Scanners.py     2004-04-16 23:38:32 UTC 
(rev 5732)
+++ trunk/gnue-common/src/external/plex/Scanners.py     2004-04-16 23:41:02 UTC 
(rev 5733)
@@ -0,0 +1,377 @@
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#
+#   Scanning an input stream
+#
+#=======================================================================
+
+import Errors
+from Regexps import BOL, EOL, EOF
+
+class Scanner:
+  """
+  A Scanner is used to read tokens from a stream of characters
+  using the token set specified by a Plex.Lexicon.
+
+  Constructor:
+
+    Scanner(lexicon, stream, name = '')
+
+      See the docstring of the __init__ method for details.
+
+  Methods:
+
+    See the docstrings of the individual methods for more
+    information.
+
+    read() --> (value, text)
+      Reads the next lexical token from the stream.
+
+    position() --> (name, line, col)
+      Returns the position of the last token read using the
+      read() method.
+    
+    begin(state_name)
+      Causes scanner to change state.
+    
+    produce(value [, text])
+      Causes return of a token value to the caller of the
+      Scanner.
+
+  """
+
+  lexicon = None        # Lexicon
+  stream = None         # file-like object
+  name = ''
+  buffer = ''
+  buf_start_pos = 0     # position in input of start of buffer
+  next_pos = 0          # position in input of next char to read
+  cur_pos = 0           # position in input of current char
+  cur_line = 1          # line number of current char
+  cur_line_start = 0    # position in input of start of current line
+  start_pos = 0         # position in input of start of token
+  start_line = 0        # line number of start of token
+  start_col = 0         # position in line of start of token
+  text = None           # text of last token read
+  initial_state = None  # Node
+  state_name = ''       # Name of initial state
+  queue = None          # list of tokens to be returned
+  trace = 0
+
+  def __init__(self, lexicon, stream, name = ''):
+    """
+    Scanner(lexicon, stream, name = '')
+
+      |lexicon| is a Plex.Lexicon instance specifying the lexical tokens
+      to be recognised.
+
+      |stream| can be a file object or anything which implements a
+      compatible read() method.
+
+      |name| is optional, and may be the name of the file being
+      scanned or any other identifying string.
+    """
+    self.lexicon = lexicon
+    self.stream = stream
+    self.name = name
+    self.queue = []
+    self.initial_state = None
+    self.begin('')
+    self.next_pos = 0
+    self.cur_pos = 0
+    self.cur_line_start = 0
+    self.cur_char = BOL
+    self.input_state = 1
+
+  def read(self):
+    """
+    Read the next lexical token from the stream and return a
+    tuple (value, text), where |value| is the value associated with
+    the token as specified by the Lexicon, and |text| is the actual
+    string read from the stream. Returns (None, '') on end of file.
+    """
+    queue = self.queue
+    while not queue:
+      self.text, action = self.scan_a_token()
+      if action is None:
+        self.produce(None)
+        self.eof()
+      else:
+        value = action.perform(self, self.text)
+        if value is not None:
+          self.produce(value)
+    result = queue[0]
+    del queue[0]
+    return result
+
+  def scan_a_token(self):
+    """
+    Read the next input sequence recognised by the machine
+    and return (text, action). Returns ('', None) on end of
+    file.
+    """
+    self.start_pos = self.cur_pos
+    self.start_line = self.cur_line
+    self.start_col = self.cur_pos - self.cur_line_start
+#              if self.trace:
+#                      action = self.run_machine()
+#              else:
+#                      action = self.run_machine_inlined()
+    action = self.run_machine_inlined()
+    if action:
+      if self.trace:
+        print "Scanner: read: Performing", action, "%d:%d" % (
+          self.start_pos, self.cur_pos)
+      base = self.buf_start_pos
+      text = self.buffer[self.start_pos - base : self.cur_pos - base]
+      return (text, action)
+    else:
+      if self.cur_pos == self.start_pos:
+        if self.cur_char == EOL:
+          self.next_char()
+        if not self.cur_char or self.cur_char == EOF:
+          return ('', None)
+      raise Errors.UnrecognizedInput(self, self.state_name)
+  
+  def run_machine(self):
+    """
+    Run the machine until no more transitions are possible.
+    """
+    self.state = self.initial_state
+    self.backup_state = None
+    while self.transition():
+      pass
+    return self.back_up()
+  
+  def run_machine_inlined(self):
+    """
+    Inlined version of run_machine for speed.
+    """
+    state = self.initial_state
+    cur_pos = self.cur_pos
+    cur_line = self.cur_line
+    cur_line_start = self.cur_line_start
+    cur_char = self.cur_char
+    input_state = self.input_state
+    next_pos = self.next_pos
+    buffer = self.buffer
+    buf_start_pos = self.buf_start_pos
+    buf_len = len(buffer)
+    backup_state = None
+    trace = self.trace
+    while 1:
+      if trace: #TRACE#
+        print "State %d, %d/%d:%s -->" % ( #TRACE#
+          state['number'], input_state, cur_pos, repr(cur_char)),  #TRACE#
+      # Begin inlined self.save_for_backup()
+      #action = state.action address@hidden
+      action = state['action'] address@hidden
+      if action:
+        backup_state = (
+          action, cur_pos, cur_line, cur_line_start, cur_char, input_state, 
next_pos)
+      # End inlined self.save_for_backup()
+      c = cur_char
+      #new_state = state.new_state(c) address@hidden
+      new_state = state.get(c, -1) address@hidden
+      if new_state == -1: address@hidden
+        new_state = c and state.get('else') address@hidden
+      if new_state:
+        if trace: #TRACE#
+          print "State %d" % new_state['number']  #TRACE#
+        state = new_state
+        # Begin inlined: self.next_char()
+        if input_state == 1:
+          cur_pos = next_pos
+          # Begin inlined: c = self.read_char()
+          buf_index = next_pos - buf_start_pos
+          if buf_index < buf_len:
+            c = buffer[buf_index]
+            next_pos = next_pos + 1
+          else:
+            discard = self.start_pos - buf_start_pos
+            data = self.stream.read(0x1000)
+            buffer = self.buffer[discard:] + data
+            self.buffer = buffer
+            buf_start_pos = buf_start_pos + discard
+            self.buf_start_pos = buf_start_pos
+            buf_len = len(buffer)
+            buf_index = buf_index - discard
+            if data:
+              c = buffer[buf_index]
+              next_pos = next_pos + 1
+            else:
+              c = ''
+          # End inlined: c = self.read_char()
+          if c == '\n':
+            cur_char = EOL
+            input_state = 2
+          elif not c:
+            cur_char = EOL
+            input_state = 4
+          else:
+            cur_char = c
+        elif input_state == 2:
+          cur_char = '\n'
+          input_state = 3
+        elif input_state == 3:
+          cur_line = cur_line + 1
+          cur_line_start = cur_pos = next_pos
+          cur_char = BOL
+          input_state = 1
+        elif input_state == 4:
+          cur_char = EOF
+          input_state = 5
+        else: # input_state = 5
+          cur_char = ''
+        # End inlined self.next_char()
+      else: # not new_state
+        if trace: #TRACE#
+          print "blocked"  #TRACE#
+        # Begin inlined: action = self.back_up()
+        if backup_state:
+          (action, cur_pos, cur_line, cur_line_start, 
+            cur_char, input_state, next_pos) = backup_state
+        else:
+          action = None
+        break # while 1
+        # End inlined: action = self.back_up()
+    self.cur_pos = cur_pos
+    self.cur_line = cur_line
+    self.cur_line_start = cur_line_start
+    self.cur_char = cur_char
+    self.input_state = input_state
+    self.next_pos       = next_pos
+    if trace: #TRACE#
+      if action: #TRACE#
+        print "Doing", action #TRACE#
+    return action
+    
+#      def transition(self):
+#              self.save_for_backup()
+#              c = self.cur_char
+#              new_state = self.state.new_state(c)
+#              if new_state:
+#                      if self.trace:
+#                              print "Scanner: read: State %d: %s --> State 
%d" % (
+#                                      self.state.number, repr(c), 
new_state.number)
+#                      self.state = new_state
+#                      self.next_char()
+#                      return 1
+#              else:
+#                      if self.trace:
+#                              print "Scanner: read: State %d: %s --> blocked" 
% (
+#                                      self.state.number, repr(c))
+#                      return 0
+  
+#      def save_for_backup(self):
+#              action = self.state.get_action()
+#              if action:
+#                      if self.trace:
+#                              print "Scanner: read: Saving backup point at", 
self.cur_pos
+#                      self.backup_state = (
+#                              action, self.cur_pos, self.cur_line, 
self.cur_line_start, 
+#                              self.cur_char, self.input_state, self.next_pos)
+  
+#      def back_up(self):
+#              backup_state = self.backup_state
+#              if backup_state:
+#                      (action, self.cur_pos, self.cur_line, 
self.cur_line_start, 
+#                              self.cur_char, self.input_state, self.next_pos) 
= backup_state
+#                      if self.trace:
+#                              print "Scanner: read: Backing up to", 
self.cur_pos
+#                      return action
+#              else:
+#                      return None
+  
+  def next_char(self):
+    input_state = self.input_state
+    if self.trace:
+      print "Scanner: next:", " "*20, "[%d] %d" % (input_state, self.cur_pos),
+    if input_state == 1:
+      self.cur_pos = self.next_pos
+      c = self.read_char()
+      if c == '\n':
+        self.cur_char = EOL
+        self.input_state = 2
+      elif not c:
+        self.cur_char = EOL
+        self.input_state = 4
+      else:
+        self.cur_char = c
+    elif input_state == 2:
+      self.cur_char = '\n'
+      self.input_state = 3
+    elif input_state == 3:
+      self.cur_line = self.cur_line + 1
+      self.cur_line_start = self.cur_pos = self.next_pos
+      self.cur_char = BOL
+      self.input_state = 1
+    elif input_state == 4:
+      self.cur_char = EOF
+      self.input_state = 5
+    else: # input_state = 5
+      self.cur_char = ''
+    if self.trace:
+      print "--> [%d] %d %s" % (input_state, self.cur_pos, repr(self.cur_char))
+    
+#      def read_char(self):
+#              """
+#    Get the next input character, filling the buffer if necessary.
+#    Returns '' at end of file.
+#    """
+#              next_pos = self.next_pos
+#              buf_index = next_pos - self.buf_start_pos
+#              if buf_index == len(self.buffer):
+#                      discard = self.start_pos - self.buf_start_pos
+#                      data = self.stream.read(0x1000)
+#                      self.buffer = self.buffer[discard:] + data
+#                      self.buf_start_pos = self.buf_start_pos + discard
+#                      buf_index = buf_index - discard
+#                      if not data:
+#                              return ''
+#              c = self.buffer[buf_index]
+#              self.next_pos = next_pos + 1
+#              return c
+  
+  def position(self):
+    """
+    Return a tuple (name, line, col) representing the location of
+    the last token read using the read() method. |name| is the
+    name that was provided to the Scanner constructor; |line|
+    is the line number in the stream (1-based); |col| is the
+    position within the line of the first character of the token
+    (0-based).
+    """
+    return (self.name, self.start_line, self.start_col)
+
+  def begin(self, state_name):
+    """Set the current state of the scanner to the named state."""
+    self.initial_state = (
+      self.lexicon.get_initial_state(state_name))
+    self.state_name = state_name
+
+  def produce(self, value, text = None):
+    """
+    Called from an action procedure, causes |value| to be returned
+    as the token value from read(). If |text| is supplied, it is
+    returned in place of the scanned text.
+
+    produce() can be called more than once during a single call to an action
+    procedure, in which case the tokens are queued up and returned one
+    at a time by subsequent calls to read(), until the queue is empty,
+    whereupon scanning resumes.
+    """
+    if text is None:
+      text = self.text
+    self.queue.append((value, text))
+
+  def eof(self):
+    """
+    Override this method if you want something to be done at
+    end of file.
+    """
+
+# For backward compatibility:
+setattr(Scanner, "yield", Scanner.produce)

Added: trunk/gnue-common/src/external/plex/Timing.py
===================================================================
--- trunk/gnue-common/src/external/plex/Timing.py       2004-04-16 23:38:32 UTC 
(rev 5732)
+++ trunk/gnue-common/src/external/plex/Timing.py       2004-04-16 23:41:02 UTC 
(rev 5733)
@@ -0,0 +1,22 @@
+#
+#   Get time in platform-dependent way
+#
+
+import os
+from sys import platform, exit, stderr
+
+if platform == 'mac':
+  import MacOS
+  def time():
+    return MacOS.GetTicks() / 60.0
+  timekind = "real"
+elif hasattr(os, 'times'):
+  def time():
+    t = os.times()
+    return t[0] + t[1]
+  timekind = "cpu"
+else:
+  stderr.write(
+    "Don't know how to get time on platform %s\n" % repr(platform))
+  exit(1)
+

Added: trunk/gnue-common/src/external/plex/Traditional.py
===================================================================
--- trunk/gnue-common/src/external/plex/Traditional.py  2004-04-16 23:38:32 UTC 
(rev 5732)
+++ trunk/gnue-common/src/external/plex/Traditional.py  2004-04-16 23:41:02 UTC 
(rev 5733)
@@ -0,0 +1,154 @@
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#   Traditional Regular Expression Syntax
+#
+#=======================================================================
+
+from Regexps import *
+from Errors import PlexError
+
+class RegexpSyntaxError(PlexError):
+  pass
+  
+def re(s):
+  """
+  Convert traditional string representation of regular expression |s|
+  into Plex representation.
+  """
+  return REParser(s).parse_re()
+
+class REParser:
+
+  def __init__(self, s):
+    self.s = s
+    self.i = -1
+    self.end = 0
+    self.next()
+  
+  def parse_re(self):
+    re = self.parse_alt()
+    if not self.end:
+      self.error("Unexpected %s" % repr(self.c))
+    return re
+  
+  def parse_alt(self):
+    """Parse a set of alternative regexps."""
+    re = self.parse_seq()
+    if self.c == '|':
+      re_list = [re]
+      while self.c == '|':
+        self.next()
+        re_list.append(self.parse_seq())
+      re = apply(Alt, tuple(re_list))
+    return re
+      
+  def parse_seq(self):
+    """Parse a sequence of regexps."""
+    re_list = []
+    while not self.end and not self.c in "|)":
+      re_list.append(self.parse_mod())
+    return apply(Seq, tuple(re_list))
+  
+  def parse_mod(self):
+    """Parse a primitive regexp followed by *, +, ? modifiers."""
+    re = self.parse_prim()
+    while not self.end and self.c in "*+?":
+      if self.c == '*':
+        re = Rep(re)
+      elif self.c == '+':
+        re = Rep1(re)
+      else: # self.c == '?'
+        re = Opt(re)
+      self.next()
+    return re
+
+  def parse_prim(self):
+    """Parse a primitive regexp."""
+    c = self.get()
+    if c == '.':
+      re = AnyBut("\n")
+    elif c == '^':
+      re = Bol
+    elif c == '$':
+      re = Eol
+    elif c == '(':
+      re = self.parse_alt()
+      self.expect(')')
+    elif c == '[':
+      re = self.parse_charset()
+      self.expect(']')
+    else:
+      if c == '\\':
+        c = self.get()
+      re = Char(c)
+    return re
+  
+  def parse_charset(self):
+    """Parse a charset. Does not include the surrounding []."""
+    char_list = []
+    invert = 0
+    if self.c == '^':
+      invert = 1
+      self.next()
+    if self.c == ']':
+      char_list.append(']')
+      self.next()
+    while not self.end and self.c <> ']':
+      c1 = self.get()
+      if self.c == '-' and self.lookahead(1) <> ']':
+        self.next()
+        c2 = self.get()
+        for a in xrange(ord(c1), ord(c2) + 1):
+          char_list.append(chr(a))
+      else:
+        char_list.append(c1)
+    chars = string.join(char_list, "")
+    if invert:
+      return AnyBut(chars)
+    else:
+      return Any(chars)
+  
+  def next(self):
+    """Advance to the next char."""
+    s = self.s
+    i = self.i = self.i + 1
+    if i < len(s):
+      self.c = s[i]
+    else:
+      self.c = ''
+      self.end = 1
+    
+  def get(self):
+    if self.end:
+      self.error("Premature end of string")
+    c = self.c
+    self.next()
+    return c
+    
+  def lookahead(self, n):
+    """Look ahead n chars."""
+    j = self.i + n
+    if j < len(self.s):
+      return self.s[j]
+    else:
+      return ''
+
+  def expect(self, c):
+    """
+    Expect to find character |c| at current position.
+    Raises an exception otherwise.
+    """
+    if self.c == c:
+      self.next()
+    else:
+      self.error("Missing %s" % repr(c))
+  
+  def error(self, mess):
+    """Raise exception to signal syntax error in regexp."""
+    raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
+      repr(self.s), self.i, mess))
+  
+  
+

Added: trunk/gnue-common/src/external/plex/Transitions.py
===================================================================
--- trunk/gnue-common/src/external/plex/Transitions.py  2004-04-16 23:38:32 UTC 
(rev 5732)
+++ trunk/gnue-common/src/external/plex/Transitions.py  2004-04-16 23:41:02 UTC 
(rev 5733)
@@ -0,0 +1,253 @@
+#
+#   Plex - Transition Maps
+#
+#   This version represents state sets direcly as dicts
+#   for speed.
+#
+
+from copy import copy
+import string
+from sys import maxint
+from types import TupleType
+
+class TransitionMap:
+  """
+  A TransitionMap maps an input event to a set of states.
+  An input event is one of: a range of character codes, 
+  the empty string (representing an epsilon move), or one 
+  of the special symbols BOL, EOL, EOF.
+  
+  For characters, this implementation compactly represents 
+  the map by means of a list:
+  
+    [code_0, states_0, code_1, states_1, code_2, states_2,
+      ..., code_n-1, states_n-1, code_n]
+    
+  where |code_i| is a character code, and |states_i| is a 
+  set of states corresponding to characters with codes |c|
+  in the range |code_i| <= |c| <= |code_i+1|.
+  
+  The following invariants hold:
+    n >= 1
+    code_0 == -maxint
+    code_n == maxint
+    code_i < code_i+1 for i in 0..n-1
+    states_0 == states_n-1
+  
+  Mappings for the special events '', BOL, EOL, EOF are
+  kept separately in a dictionary.
+  """
+  
+  map = None     # The list of codes and states
+  special = None # Mapping for special events
+  
+  def __init__(self, map = None, special = None):
+    if not map:
+      map = [-maxint, {}, maxint]
+    if not special:
+      special = {}
+    self.map = map
+    self.special = special
+    #self.check() ###
+  
+  def add(self, event, new_state,
+    TupleType = TupleType):
+    """
+    Add transition to |new_state| on |event|.
+    """
+    if type(event) == TupleType:
+      code0, code1 = event
+      i = self.split(code0)
+      j = self.split(code1)
+      map = self.map
+      while i < j:
+        map[i + 1][new_state] = 1
+        i = i + 2
+    else:
+      self.get_special(event)[new_state] = 1
+
+  def add_set(self, event, new_set,
+    TupleType = TupleType):
+    """
+    Add transitions to the states in |new_set| on |event|.
+    """
+    if type(event) == TupleType:
+      code0, code1 = event
+      i = self.split(code0)
+      j = self.split(code1)
+      map = self.map
+      while i < j:
+        map[i + 1].update(new_set)
+        i = i + 2
+    else:
+      self.get_special(event).update(new_set)
+  
+  def get_epsilon(self,
+    None = None):
+    """
+    Return the mapping for epsilon, or None.
+    """
+    return self.special.get('', None)
+  
+  def items(self,
+    len = len):
+    """
+    Return the mapping as a list of ((code1, code2), state_set) and
+    (special_event, state_set) pairs.
+    """
+    result = []
+    map = self.map
+    else_set = map[1]
+    i = 0
+    n = len(map) - 1
+    code0 = map[0]
+    while i < n:
+      set = map[i + 1]
+      code1 = map[i + 2]
+      if set or else_set:
+        result.append(((code0, code1), set))
+      code0 = code1
+      i = i + 2
+    for event, set in self.special.items():
+      if set:
+        result.append((event, set))
+    return result
+  
+  # ------------------- Private methods --------------------
+
+  def split(self, code,
+    len = len, maxint = maxint):
+    """
+    Search the list for the position of the split point for |code|, 
+    inserting a new split point if necessary. Returns index |i| such 
+    that |code| == |map[i]|.
+    """
+    # We use a funky variation on binary search.
+    map = self.map
+    hi = len(map) - 1
+    # Special case: code == map[-1]
+    if code == maxint:
+      return hi
+    # General case
+    lo = 0
+    # loop invariant: map[lo] <= code < map[hi] and hi - lo >= 2
+    while hi - lo >= 4:
+      # Find midpoint truncated to even index
+      mid = ((lo + hi) / 2) & ~1
+      if code < map[mid]:
+        hi = mid
+      else:
+        lo = mid
+    # map[lo] <= code < map[hi] and hi - lo == 2
+    if map[lo] == code:
+      return lo
+    else:
+      map[hi:hi] = [code, map[hi - 1].copy()]
+      #self.check() ###
+      return hi
+  
+  def get_special(self, event):
+    """
+    Get state set for special event, adding a new entry if necessary.
+    """
+    special = self.special
+    set = special.get(event, None)
+    if not set:
+      set = {}
+      special[event] = set
+    return set
+
+  # --------------------- Conversion methods -----------------------
+  
+  def __str__(self):
+    map_strs = []
+    map = self.map
+    n = len(map)
+    i = 0
+    while i < n:
+      code = map[i]
+      if code == -maxint:
+        code_str = "-inf"
+      elif code == maxint:
+        code_str = "inf"
+      else:
+        code_str = str(code)
+      map_strs.append(code_str)
+      i = i + 1
+      if i < n:
+        map_strs.append(state_set_str(map[i]))
+      i = i + 1
+    special_strs = {}
+    for event, set in self.special.items():
+      special_strs[event] = state_set_str(set)
+    return "[%s]+%s" % (
+      string.join(map_strs, ","), 
+      special_strs
+    )
+  
+  # --------------------- Debugging methods -----------------------
+  
+  def check(self):
+    """Check data structure integrity."""
+    if not self.map[-3] < self.map[-1]:
+      print self
+      assert 0
+  
+  def dump(self, file):
+    map = self.map
+    i = 0
+    n = len(map) - 1
+    while i < n:
+      self.dump_range(map[i], map[i + 2], map[i + 1], file)
+      i = i + 2
+    for event, set in self.special.items():
+      if set:
+        if not event:
+          event = 'empty'
+        self.dump_trans(event, set, file)
+  
+  def dump_range(self, code0, code1, set, file):
+    if set:
+      if code0 == -maxint:
+        if code1 == maxint:
+          k = "any"
+        else:
+          k = "< %s" % self.dump_char(code1)
+      elif code1 == maxint:
+        k = "> %s" % self.dump_char(code0 - 1)
+      elif code0 == code1 - 1:
+        k = self.dump_char(code0)
+      else:
+        k = "%s..%s" % (self.dump_char(code0), 
+          self.dump_char(code1 - 1))
+      self.dump_trans(k, set, file)
+  
+  def dump_char(self, code):
+    if 0 <= code <= 255:
+      return repr(chr(code))
+    else:
+      return "chr(%d)" % code
+  
+  def dump_trans(self, key, set, file):
+    file.write("      %s --> %s\n" % (key, self.dump_set(set)))
+  
+  def dump_set(self, set):
+    return state_set_str(set)
+
+#
+#   State set manipulation functions
+#
+
+#def merge_state_sets(set1, set2):
+#              for state in set2.keys():
+#                      set1[state] = 1
+
+def state_set_str(set):
+  state_list = set.keys()
+  str_list = []
+  for state in state_list:
+    str_list.append("S%d" % state.number)
+  return "[%s]" % string.join(str_list, ",")
+  
+    
+

Added: trunk/gnue-common/src/external/plex/__init__.py
===================================================================
--- trunk/gnue-common/src/external/plex/__init__.py     2004-04-16 23:38:32 UTC 
(rev 5732)
+++ trunk/gnue-common/src/external/plex/__init__.py     2004-04-16 23:41:02 UTC 
(rev 5733)
@@ -0,0 +1,40 @@
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#=======================================================================
+
+"""
+The Plex module provides lexical analysers with similar capabilities
+to GNU Flex. The following classes and functions are exported;
+see the attached docstrings for more information.
+
+   Scanner          For scanning a character stream under the
+                    direction of a Lexicon.
+
+   Lexicon          For constructing a lexical definition
+                    to be used by a Scanner.
+
+   Str, Any, AnyBut, AnyChar, Seq, Alt, Opt, Rep, Rep1,
+   Bol, Eol, Eof, Empty
+
+                    Regular expression constructors, for building pattern
+                    definitions for a Lexicon.
+
+   State            For defining scanner states when creating a
+                    Lexicon.
+
+   TEXT, IGNORE, Begin
+
+                    Actions for associating with patterns when
+        creating a Lexicon.
+"""
+
+from Actions import TEXT, IGNORE, Begin
+from Lexicons import Lexicon, State
+from Regexps import RE, Seq, Alt, Rep1, Empty, Str, Any, AnyBut, AnyChar, Range
+from Regexps import Opt, Rep, Bol, Eol, Eof, Case, NoCase
+from Scanners import Scanner
+
+
+

Added: trunk/gnue-common/src/external/plex/test_tm.py
===================================================================
--- trunk/gnue-common/src/external/plex/test_tm.py      2004-04-16 23:38:32 UTC 
(rev 5732)
+++ trunk/gnue-common/src/external/plex/test_tm.py      2004-04-16 23:41:02 UTC 
(rev 5733)
@@ -0,0 +1,24 @@
+import sys
+sys.stderr = sys.stdout
+
+from TransitionMaps import TransitionMap
+
+m = TransitionMap()
+print m
+
+def add(c, s):
+  print
+  print "adding", repr(c), "-->", repr(s)
+  m.add_transition(c, s)
+  print m
+  print "keys:", m.keys()
+
+add('a','alpha')
+add('e', 'eta')
+add('f', 'foo')
+add('i', 'iota')
+add('i', 'imp')
+add('eol', 'elephant')
+
+
+





reply via email to

[Prev in Thread] Current Thread [Next in Thread]