address@hidden@ # # yyout2grammar.py # This file is part of LilyPond, the GNU music typesetter. # # Copyright (C) 2005 by Carl D. Sorensen # # LilyPond is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # LilyPond is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with LilyPond. If not, see . # Convert from bison output file parser.output to # Grammar and alphabetic index. # Drops all of the state information. # Converts \\ to \ # Eliminates the @ variables created when {} is placed # in the middle of a rule # Reassembles grammar rules fragmented by the insertion of @ rules # Eliminates all of the c-code stuff # Wraps lines that are longer than 78 characters for improved # formatting # # to create input file, run # bison -v parser.yy # this will create a file parser.output # then run # yyout2grammar.py parser.output your_output_file # import sys import re # for string recognition and modification ___________________________ mainrulestart_re = re.compile(r' \w*: ') # regular nonterminal on left auxitem_re = re.compile(r' address@hidden') # bison-generated nonterminal on right intro_re = re.compile(r'(.*[:|])\s') keyword_re = re.compile(r'(\S+)\s') indent4_re = re.compile(r'\A \S') # an indent of 4 spaces auxfirstcol_re = re.compile(r'address@hidden') # nonterminal starting with $ or @ def rulesplice(rulestartmatch): chars = len(rulestartmatch.group(0)) return "| ".rjust(chars) def strip_backslash(input_string): # as inserted by bison in its summary return input_string.replace('\\\\', '\\') # double to single # for writing_out our modified summary ______________________________ # - grammar rule section: break overlong rule lines # - other sections: their output 'lines' already contain suitable breaks def write_line(output_line, output_file): max_line_length = 78 indent_value = 3 output_line = strip_backslash(output_line) intro = intro_re.match(output_line) # if has a rule start and/or component if len(output_line) > max_line_length and intro: # too many items inline # give each component item its own line: output_file.write(intro.group(1)+" ") indent_column = len(intro.group(1)) output_line = output_line[indent_column:] keyword = keyword_re.search(output_line) while keyword: output_file.write(keyword.group(1) + " \n") output_line = output_line[keyword.end(1):] keyword = keyword_re.search(output_line) if keyword: output_file.write("".rjust(indent_column + indent_value)) else: output_file.write(output_line) return # for reading_in the summary produced by bison ______________________ # bison may need to insert an aux rule while processing a main rule, and so # the component lines of a grammar rule may arrive in instalments # # rule_lines accumulates these instalments until the rule is complete rule_lines = [""] # a nonprinting way to avoid the initial [0].find erroring def keeprule(): global rule_lines for rule_line in rule_lines: # move the rule out, but with some items write_line(auxitem_re.sub('', rule_line), out_file) # elided rule_lines = [] # and forget it write_line("\n", out_file) # reinsert vert separator return def getmainrule(rulestarthit): global rule_lines, inline if rule_lines[0].find(rulestarthit.group(0)) < 0: # mismatch # we have reached a different rule, so the previous one is complete keeprule() # write out the completed rule, eliding any auxitems else: # have found a component continuing what is already accumulated inline = mainrulestart_re.sub(rulesplice, inline) # hide the breach rule_lines.append(inline) # now see if there are any more components inline = in_file.readline() while inline != "\n": rule_lines.append(inline) inline = in_file.readline() # leaving inline found white return def skipto_nonwhite(_inline): while _inline == "\n": _inline = in_file.readline() # advance return _inline def skipto_white(_inline): while _inline != '' and _inline != "\n": _inline = in_file.readline() # advance return _inline # script entry point ________________________________________________ # if sys.argv is lengthened by dummy entries, uncomment this one-liner: # sys.argv.append(''); del sys.argv[sys.argv.index(''):] # prune args of '' if len(sys.argv) != 3: print "Usage: yyout2grammar.py parser_output_file grammar_file." else: in_name = sys.argv[1] out_name = sys.argv[2] print "input file name", in_name print "output file name", out_name in_file = open(in_name, 'r') out_file = open(out_name, 'w') # skip preliminary material from file e.g. unused Terminals inline = in_file.readline() while inline != '' and not inline.startswith("Grammar"): inline = in_file.readline() # have skipped all preliminary lines write_line(inline + "\n", out_file) # write the Grammar header inline = skipto_nonwhite(in_file.readline()) # leaving inline at first entry of Grammar section while inline != '' and not inline.startswith("Terminals"): # generally expecting inline to be holding a rule start # but this is not required initially; the loop will phaselock mainrulestarthit = re.search(mainrulestart_re, inline) if mainrulestarthit: # found a main rule (re)starting getmainrule(mainrulestarthit) # a nonwhite section, maybe partial else: # found maybe an aux rule start, or just an intermediate line # so discard lines until reach the next probable rule start: inline = skipto_white(in_file.readline()) inline = skipto_nonwhite(in_file.readline()) # past end of Grammar section keeprule() # flush the rule buffer; it is waiting for further instalments write_line("\n\n", out_file) if inline == '': # readline encountered EOF recently sys.exit("\nGrammar section aborted at EOF\n") # don't know if # have processed all Grammar lines write_line(inline + "\n", out_file) # write the Terminals header inline = skipto_nonwhite(in_file.readline()) # leaving inline at first entry of Terminals section index_items = [] index_item = inline inline = in_file.readline() while inline != '' and not inline.startswith("Nonterminals"): while re.search(indent4_re, inline): # at a continuation line index_item = index_item + inline # concatenate it (includes \n) inline = in_file.readline() # reached unindented, so index_item group is complete index_items.append(index_item) # note it index_item = inline inline = in_file.readline() # past end of Terminals section index_items.sort(lambda x, y: cmp(x.lower(), y.lower())) # alphabetic for index_item in index_items: write_line(index_item, out_file) write_line('\n\n', out_file) if inline == '': # readline encountered EOF recently sys.exit("\nTerminals section aborted at EOF\n") # don't know if # have processed all Terminals lines write_line(inline + "\n", out_file) # write the Nonterminals header inline = skipto_nonwhite(in_file.readline()) # leaving inline at first entry of Nonterminals section index_items = [] index_item = inline inline = in_file.readline() while inline != '' and inline != "\n": # at a nonwhite line while re.search(indent4_re, inline): # at a continuation line index_item = index_item + inline # concatenate it (includes \n) inline = in_file.readline() # reached unindented, so index_item group is complete, but if not re.search(auxfirstcol_re, index_item): # only main nonterminals index_items.append(index_item) # are noted index_item = inline inline = in_file.readline() # past end of (wholly nonwhite) Nonterminals section index_items.sort(lambda x, y: cmp(x.lower(), y.lower())) # alphabetic for index_item in index_items: write_line(index_item, out_file) write_line('\n\n', out_file) if inline == '': # readline encountered EOF recently sys.exit("\nNonterminals section aborted at EOF\n") # don't know if # have processed all Nonterminals lines # end of script _____________________________________________________