address@hidden@
#
# yyout2grammar.py
# This file is part of LilyPond, the GNU music typesetter.
#
# Copyright (C) 2005 by Carl D. Sorensen
#
# LilyPond is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# LilyPond is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LilyPond. If not, see .
# Convert from bison output file parser.output to
# Grammar and alphabetic index.
# Drops all of the state information.
# Converts \\ to \
# Eliminates the @ variables created when {} is placed
# in the middle of a rule
# Reassembles grammar rules fragmented by the insertion of @ rules
# Eliminates all of the c-code stuff
# Wraps lines that are longer than 78 characters for improved
# formatting
#
# to create input file, run
# bison -v parser.yy
# this will create a file parser.output
# then run
# yyout2grammar.py parser.output your_output_file
#
import sys
import re
# for string recognition and modification ___________________________
mainrulestart_re = re.compile(r' \w*: ') # regular nonterminal on left
auxitem_re = re.compile(r' address@hidden') # bison-generated nonterminal on right
intro_re = re.compile(r'(.*[:|])\s')
keyword_re = re.compile(r'(\S+)\s')
indent4_re = re.compile(r'\A \S') # an indent of 4 spaces
auxfirstcol_re = re.compile(r'address@hidden') # nonterminal starting with $ or @
def rulesplice(rulestartmatch):
chars = len(rulestartmatch.group(0))
return "| ".rjust(chars)
def strip_backslash(input_string): # as inserted by bison in its summary
return input_string.replace('\\\\', '\\') # double to single
# for writing_out our modified summary ______________________________
# - grammar rule section: break overlong rule lines
# - other sections: their output 'lines' already contain suitable breaks
def write_line(output_line, output_file):
max_line_length = 78
indent_value = 3
output_line = strip_backslash(output_line)
intro = intro_re.match(output_line) # if has a rule start and/or component
if len(output_line) > max_line_length and intro: # too many items inline
# give each component item its own line:
output_file.write(intro.group(1)+" ")
indent_column = len(intro.group(1))
output_line = output_line[indent_column:]
keyword = keyword_re.search(output_line)
while keyword:
output_file.write(keyword.group(1) + " \n")
output_line = output_line[keyword.end(1):]
keyword = keyword_re.search(output_line)
if keyword:
output_file.write("".rjust(indent_column + indent_value))
else:
output_file.write(output_line)
return
# for reading_in the summary produced by bison ______________________
# bison may need to insert an aux rule while processing a main rule, and so
# the component lines of a grammar rule may arrive in instalments
#
# rule_lines accumulates these instalments until the rule is complete
rule_lines = [""] # a nonprinting way to avoid the initial [0].find erroring
def keeprule():
global rule_lines
for rule_line in rule_lines: # move the rule out, but with some items
write_line(auxitem_re.sub('', rule_line), out_file) # elided
rule_lines = [] # and forget it
write_line("\n", out_file) # reinsert vert separator
return
def getmainrule(rulestarthit):
global rule_lines, inline
if rule_lines[0].find(rulestarthit.group(0)) < 0: # mismatch
# we have reached a different rule, so the previous one is complete
keeprule() # write out the completed rule, eliding any auxitems
else: # have found a component continuing what is already accumulated
inline = mainrulestart_re.sub(rulesplice, inline) # hide the breach
rule_lines.append(inline)
# now see if there are any more components
inline = in_file.readline()
while inline != "\n":
rule_lines.append(inline)
inline = in_file.readline()
# leaving inline found white
return
def skipto_nonwhite(_inline):
while _inline == "\n":
_inline = in_file.readline() # advance
return _inline
def skipto_white(_inline):
while _inline != '' and _inline != "\n":
_inline = in_file.readline() # advance
return _inline
# script entry point ________________________________________________
# if sys.argv is lengthened by dummy entries, uncomment this one-liner:
# sys.argv.append(''); del sys.argv[sys.argv.index(''):] # prune args of ''
if len(sys.argv) != 3:
print "Usage: yyout2grammar.py parser_output_file grammar_file."
else:
in_name = sys.argv[1]
out_name = sys.argv[2]
print "input file name", in_name
print "output file name", out_name
in_file = open(in_name, 'r')
out_file = open(out_name, 'w')
# skip preliminary material from file e.g. unused Terminals
inline = in_file.readline()
while inline != '' and not inline.startswith("Grammar"):
inline = in_file.readline()
# have skipped all preliminary lines
write_line(inline + "\n", out_file) # write the Grammar header
inline = skipto_nonwhite(in_file.readline())
# leaving inline at first entry of Grammar section
while inline != '' and not inline.startswith("Terminals"):
# generally expecting inline to be holding a rule start
# but this is not required initially; the loop will phaselock
mainrulestarthit = re.search(mainrulestart_re, inline)
if mainrulestarthit: # found a main rule (re)starting
getmainrule(mainrulestarthit) # a nonwhite section, maybe partial
else: # found maybe an aux rule start, or just an intermediate line
# so discard lines until reach the next probable rule start:
inline = skipto_white(in_file.readline())
inline = skipto_nonwhite(in_file.readline())
# past end of Grammar section
keeprule() # flush the rule buffer; it is waiting for further instalments
write_line("\n\n", out_file)
if inline == '': # readline encountered EOF recently
sys.exit("\nGrammar section aborted at EOF\n") # don't know if
# have processed all Grammar lines
write_line(inline + "\n", out_file) # write the Terminals header
inline = skipto_nonwhite(in_file.readline())
# leaving inline at first entry of Terminals section
index_items = []
index_item = inline
inline = in_file.readline()
while inline != '' and not inline.startswith("Nonterminals"):
while re.search(indent4_re, inline): # at a continuation line
index_item = index_item + inline # concatenate it (includes \n)
inline = in_file.readline()
# reached unindented, so index_item group is complete
index_items.append(index_item) # note it
index_item = inline
inline = in_file.readline()
# past end of Terminals section
index_items.sort(lambda x, y: cmp(x.lower(), y.lower())) # alphabetic
for index_item in index_items:
write_line(index_item, out_file)
write_line('\n\n', out_file)
if inline == '': # readline encountered EOF recently
sys.exit("\nTerminals section aborted at EOF\n") # don't know if
# have processed all Terminals lines
write_line(inline + "\n", out_file) # write the Nonterminals header
inline = skipto_nonwhite(in_file.readline())
# leaving inline at first entry of Nonterminals section
index_items = []
index_item = inline
inline = in_file.readline()
while inline != '' and inline != "\n": # at a nonwhite line
while re.search(indent4_re, inline): # at a continuation line
index_item = index_item + inline # concatenate it (includes \n)
inline = in_file.readline()
# reached unindented, so index_item group is complete, but
if not re.search(auxfirstcol_re, index_item): # only main nonterminals
index_items.append(index_item) # are noted
index_item = inline
inline = in_file.readline()
# past end of (wholly nonwhite) Nonterminals section
index_items.sort(lambda x, y: cmp(x.lower(), y.lower())) # alphabetic
for index_item in index_items:
write_line(index_item, out_file)
write_line('\n\n', out_file)
if inline == '': # readline encountered EOF recently
sys.exit("\nNonterminals section aborted at EOF\n") # don't know if
# have processed all Nonterminals lines
# end of script _____________________________________________________