diff options
Diffstat (limited to 'util/newconfig')
-rw-r--r-- | util/newconfig/LICENSE | 18 | ||||
-rw-r--r-- | util/newconfig/Makefile | 31 | ||||
-rw-r--r-- | util/newconfig/NOTES | 46 | ||||
-rw-r--r-- | util/newconfig/config.g | 2580 | ||||
-rw-r--r-- | util/newconfig/parsedesc.g | 196 | ||||
-rw-r--r-- | util/newconfig/test.config | 6 | ||||
-rw-r--r-- | util/newconfig/yapps2.py | 779 | ||||
-rw-r--r-- | util/newconfig/yapps2.tex | 1225 | ||||
-rw-r--r-- | util/newconfig/yappsrt.py | 172 |
9 files changed, 0 insertions, 5053 deletions
diff --git a/util/newconfig/LICENSE b/util/newconfig/LICENSE deleted file mode 100644 index 64f38b89f2..0000000000 --- a/util/newconfig/LICENSE +++ /dev/null @@ -1,18 +0,0 @@ -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be included -in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/util/newconfig/Makefile b/util/newconfig/Makefile deleted file mode 100644 index 3328380569..0000000000 --- a/util/newconfig/Makefile +++ /dev/null @@ -1,31 +0,0 @@ -ALL: $(shell echo *.g | sed s/\\.g/\\.py/g ) - -%.py: %.g yapps2.py yappsrt.py Makefile - python yapps2.py $< - -DOC: yapps2.ps yapps2.pdf manual/index.html - -yapps2.ps: yapps2.dvi - dvips -q yapps2.dvi -o yapps2.ps - -yapps2.pdf: yapps2.ps - ps2pdf yapps2.ps - -yapps2.dvi: yapps2.tex - latex yapps2.tex - -manual/index.html: yapps2.aux yapps2.tex - rm manual/yapps2.css - latex2html -dir 'manual' -mkdir -lcase_tags -font_size 12pt -split 4 -toc_depth 4 -html_version 4.0,unicode,table -t 'Yapps 2.0 Manual' -address 'Amit J Patel, amitp@cs.stanford.edu' -info 0 -show_section_numbers -up_title 'Yapps Page' -up_url 'http://theory.stanford.edu/~amitp/yapps/' -strict -image_type png yapps2.tex - echo '@import url("http://www-cs-students.stanford.edu/~amitp/amitp.css");' > manual/yapps2-new.css - echo 'hr { display:none; }' >> manual/yapps2-new.css - echo 'h1 br, h2 br { display:none; }' >>manual/yapps2-new.css - cat manual/yapps2.css >> manual/yapps2-new.css - rm manual/yapps2.css - mv manual/yapps2-new.css manual/yapps2.css - -DISTRIB: - cd ..; zip -u yapps2.zip yapps2/{LICENSE,yapps2.py,yappsrt.py,parsedesc.g,examples/*.g,NOTES,yapps2.tex,Makefile,manual/*.html,manual/*.css,manual/*.png} - -clean: - rm -f config.py yappsrt.pyc parsedesc.py diff --git a/util/newconfig/NOTES b/util/newconfig/NOTES deleted file mode 100644 index 325e76a479..0000000000 --- a/util/newconfig/NOTES +++ /dev/null @@ -1,46 +0,0 @@ -April 14, 2002: - -I haven't worked on Yapps for a while, mainly because I spent all my energy -on trying to graduate. Now that I've finished school, I have several projects -I want to start working on again, including Yapps. - -Notes for myself: - -Add a debugging mode that helps you understand how the grammar - is constructed and how things are being parsed -Look into an English output mode that would use natural language - to describe a grammar -Optimize unused variables -Add a convenience to automatically gather up the values returned - from subpatterns, put them into a list, and return them -Improve the documentation -Write some larger examples -Get rid of old-style regex support -Use SRE's lex support to speed up lexing (this may be hard given that - yapps allows for context-sensitive lexers) -Look over Dan Connoly's experience with Yapps (bugs, frustrations, etc.) - and see what improvements could be made -Add something to pretty-print the grammar (without the actions) -Maybe conditionals? Follow this rule only if <condition> holds. - But this would be useful mainly when multiple rules match, and we - want the first matching rule. The conditional would mean we skip to - the next rule. Maybe this is part of the attribute grammar system, - where rule X<0> can be specified separately from X<N>. -Convenience functions that could build return values for all rules - without specifying the code for each rule individually -Patterns (abstractions over rules) -- for example, comma separated values - have a certain rule pattern that gets replicated all over the place -"Gather" mode that simply outputs the return values for certain nodes. - For example, if you just want all expressions, you could ask yapps - to gather the results of the 'expr' rule into a list. This would - ignore all the higher level structure. -Look at everyone's Yapps grammars, and come up with larger examples - http://www.w3.org/2000/10/swap/SemEnglish.g - http://www.w3.org/2000/10/swap/kifExpr.g - http://www.w3.org/2000/10/swap/rdfn3.g -It would be nice if you could feed text into Yapps (push model) instead - of Yapps reading text out of a string (pull model). However, I think - that would make the resulting parser code mostly unreadable - (like yacc, etc.). Coroutines/stacklesspython may be the answer. - - diff --git a/util/newconfig/config.g b/util/newconfig/config.g deleted file mode 100644 index fbe33bf96f..0000000000 --- a/util/newconfig/config.g +++ /dev/null @@ -1,2580 +0,0 @@ -# -*- python -*- -import sys -import os -import re -import string -import types - -import traceback - -warnings = 0 -errors = 0 - -target_dir = '' -target_name = '' -treetop = '' -full_mainboard_path = '' -mainboard_path = '' -global_options = {} -global_options_by_order = [] -global_option_values = {} -global_uses_options = {} -global_exported_options = [] -romimages = {} -buildroms = [] -pciroms = [] -rommapping = {} -curimage = 0 -bootblocksize = 0 -alloptions = 0 # override uses at top level - -local_path = re.compile(r'^\.') -include_pattern = re.compile(r'%%([^%]+)%%') - -# the cpu type for this mainboard -cpu_type = 0 - -# ----------------------------------------------------------------------------- -# Utility Classes -# ----------------------------------------------------------------------------- - -class stack: - """Used to keep track of the current part or dir""" - class __stack_iter: - def __init__ (self, stack): - self.index = 0 - self.len = len(stack) - self.stack = stack - - def __iter__ (self): - return self - - def next (self): - if (self.index < self.len): - s = self.stack[self.index] - self.index = self.index + 1 - return s - raise StopIteration - - def __init__ (self): - self.stack = [] - - def __len__ (self): - return len(self.stack) - - def __getitem__ (self, i): - return self.stack[i] - - def __iter__ (self): - return self.__stack_iter(self.stack) - - def push(self, part): - self.stack.append(part) - - def pop(self): - try: - return self.stack.pop() - except IndexError: - return 0 - - def tos(self): - try: - return self.stack[-1] - except IndexError: - return 0 - - def empty(self): - return (len(self.stack) == 0) -partstack = stack() -dirstack = stack() - -class debug_info: - none = 0 - gencode = 1 - dumptree = 2 - object = 3 - dict = 4 - statement = 5 - dump = 6 - gengraph = 7 - - def __init__(self, *level): - self.__level = level - - def setdebug(self, *level): - self.__level = level - - def level(self, level): - return level in self.__level - - def info(self, level, str): - if level in self.__level: - print str - -global debug -debug = debug_info(debug_info.dumptree) -debug = debug_info(debug_info.object) - -# ----------------------------------------------------------------------------- -# Error Handling -# ----------------------------------------------------------------------------- - -class location: - """Used to keep track of our current location while parsing - configuration files""" - class __place: - def __init__(self, file, line, command): - self.file = file - self.line = line - self.command = command - def next_line(self, command): - self.line = self.line + 1 - self.command = command - def at(self): - return "%s:%d" % (self.file, self.line) - - def __init__ (self): - self.stack = stack() - - def __str__ (self): - s = '' - for p in self.stack: - if (s == ''): - s = p.at() - else: - s = s + '\n' + p.at() - return s - - def file(self): - return self.stack.tos().file - - def line(self): - return self.stack.tos().line - - def command(self): - return self.stack.tos().command - - def push(self, file): - self.stack.push(self.__place(os.path.normpath(file), 0, "")) - - def pop(self): - self.stack.pop() - - def next_line(self, command): - self.stack.tos().next_line(command) - - def at(self): - return self.stack.tos().at() -loc = location() - -def error(string): - """Print error message""" - global errors, loc - errors = errors + 1 - print "===> ERROR: %s" % string - print "%s" % loc - -def fatal(string): - """Print error message and exit""" - error(string) - exitiferrors() - -def warning(string): - """Print warning message""" - global warnings, loc - warnings = warnings + 1 - print "===> WARNING: %s" % string - -def notice(string): - """Print notice message""" - #print "===> NOTE: %s" % string - -def exitiferrors(): - """Exit parser if an error has been encountered""" - if (errors != 0): - sys.exit(1) - -def safe_open(file, mode): - try: - return open(file, mode) - except IOError: - fatal("Could not open file \"%s\"" % file) - -# ----------------------------------------------------------------------------- -# Main classes -# ----------------------------------------------------------------------------- - -class romimage: - """A rom image is the ultimate goal of coreboot""" - def __init__ (self, name): - # name of this rom image - self.name = name - - # set by 'arch' directive - self.arch = '' - - # set by 'payload' directive - self.payload = '' - - # set by 'init' directive - self.initfile = '' - - # make rules added by 'makerule' directive - self.makebaserules = {} - - # object files added by 'object' directive - self.objectrules = {} - - # init object files added by 'initobject' directive - self.initobjectrules = {} - - # driver files added by 'driver' directive - self.driverrules = {} - - # smm object files added by 'smmobject' directive - self.smmobjectrules = {} - - # loader scripts added by 'ldscript' directive - self.ldscripts = [] - - # user defines added by 'makedefine' directive - self.userdefines = [] - - # files to be included in crt0.S - self.initincludes = {} - - # as above, but order is preserved - self.initincludesorder = [] - - # transitional flag to support old crtinclude format - self.useinitincludes = 0 - - # instance counter for parts - self.partinstance = 0 - - # chip config files included by the 'config' directive - self.configincludes = {} - - # root of part tree - self.root = 0 - - # name of target directory specified by 'target' directive - self.target_dir = '' - - # option values used in rom image - self.values = {} - - # exported options - self.exported_options = [] - - # Last device built - self.last_device = 0 - - def getname(self): - return self.name - - def getvalues(self): - return self.values - - def setarch(self, arch): - self.arch = arch - - def setpayload(self, payload): - global rommapping - self.payload = payload - - rommapping[self.name] = payload - - def setinitfile(self, initfile): - self.initfile = initfile - - def getinitfile(self): - return self.initfile - - def addmakerule(self, id): - o = getdict(self.makebaserules, id) - if (o): - warning("rule %s previously defined" % id) - o = makerule(id) - setdict(self.makebaserules, id, o) - - def getmakerules(self): - return self.makebaserules - - def getmakerule(self, id): - o = getdict(self.makebaserules, id) - if (o): - return o - fatal("No such make rule \"%s\"" % id) - - def addmakeaction(self, id, str): - o = getdict(self.makebaserules, id) - if (o): - a = dequote(str) - o.addaction(a) - return - fatal("No such rule \"%s\" for addmakeaction" % id) - - def addmakedepend(self, id, str): - o = getdict(self.makebaserules, id) - if (o): - a = dequote(str) - o.adddependency(a) - return - fatal("No such rule \"%s\" for addmakedepend" % id) - - def addmakeobject(self, file, obj): - source = topify(obj[1]) - type = obj[2] - if (type == 'S'): - # for .S, .o depends on .s - file.write("%s: %s.s\n" % (obj[0], obj[3])) - file.write("\t$(CC) -c $(CONFIG_CPU_OPT) -o $@ $<\n") - # and .s depends on .S - file.write("%s.s: %s\n" % (obj[3], source)) - # Note: next 2 lines are ONE output line! - file.write("\t$(CPP) $(CPPFLAGS) $< ") - file.write(">$@.new && mv $@.new $@\n") - else: - file.write("%s: %s\n" % (obj[0], source)) - file.write("\t$(CC) -c $(CFLAGS) -o $@ $<\n") - - # this is called with an an object name. - # the easiest thing to do is add this object to the current - # component. - # such kludgery. If the name starts with '.' then make the - # dependency be on ./thing.x gag me. - def addobjectdriver(self, dict, object_name): - global dirstack - suffix = object_name[-2:] - if (suffix == '.o'): - suffix = '.c' - base = object_name[:-2] - type = object_name[-1:] - if (object_name[0] == '.'): - source = base + suffix - object = base + '.o' - rel_base = base - else: - rel_base = re.sub(treetop, "", os.path.join(dirstack.tos(), base)) - source = "$(TOP)" + rel_base + suffix - if (rel_base[0] == '/'): - rel_base = re.sub("^/", "", rel_base) - object = rel_base + '.o' - - debug.info(debug.object, "add object %s source %s" % (object, source)) - l = getdict(dict, rel_base) - if (l): - warning("object/driver %s previously defined" % rel_base) - setdict(dict, rel_base, [object, source, type, rel_base]) - - def addinitobjectrule(self, name): - self.addobjectdriver(self.initobjectrules, name) - - def addobjectrule(self, name): - self.addobjectdriver(self.objectrules, name) - - def adddriverrule(self, name): - self.addobjectdriver(self.driverrules, name) - - def addsmmobjectrule(self, name): - self.addobjectdriver(self.smmobjectrules, name) - - def getinitobjectrules(self): - return self.initobjectrules - - def getinitobjectrule(self, name): - o = getdict(self.initobjectrules, name) - if (o): - return o - fatal("No such init object rule \"%s\"" % name) - - def getobjectrules(self): - return self.objectrules - - def getobjectrule(self, name): - o = getdict(self.objectrules, name) - if (o): - return o - fatal("No such object rule \"%s\"" % name) - - def getdriverrules(self): - return self.driverrules - - def getdriverrule(self, name): - o = getdict(self.driverrules, name) - if (o): - return o - fatal("No such driver rule \"%s\"" % name) - - def getsmmobjectrules(self): - return self.smmobjectrules - - def getsmmobjectrule(self, name): - o = getdict(self.smmobjectrules, name) - if (o): - return o - fatal("No such smm object rule \"%s\"" % name) - - def addldscript(self, path): - self.ldscripts.append(path) - - def getldscripts(self): - return self.ldscripts - - def adduserdefine(self, str): - self.userdefines.append(str) - - def getuserdefines(self): - return self.userdefines - - def addinitinclude(self, str, path): - if (str != 0): - self.useinitincludes = 1 - - debug.info(debug.object, "ADDCRT0: %s -> %s" % (str, path)) - o = getdict(self.initincludes, path) - if (o): - warning("init include for %s previously defined" % path) - o = initinclude(str, path) - setdict(self.initincludes, path, o) - self.initincludesorder.append(path) - - def getinitincludes(self): - return self.initincludesorder - - def getinitinclude(self, path): - o = getdict(self.initincludes, path) - if (o): - return o - fatal("No such init include \"%s\"" % path) - - def addconfiginclude(self, part, path): - setdict(self.configincludes, part, path) - - def getconfigincludes(self): - return self.configincludes - - def getincludefilename(self): - if (self.useinitincludes): - return "crt0.S" - else: - return "crt0_includes.h" - - def newformat(self): - return self.useinitincludes - - def numparts(self): - return self.partinstance - - def newpartinstance(self): - i = self.partinstance - self.partinstance = self.partinstance + 1 - return i - - def setroot(self, part): - self.root = part - - def getroot(self): - return self.root - - def settargetdir(self, path): - self.targetdir = path - - def gettargetdir(self): - return self.targetdir - -class buildrom: - """A buildrom statement""" - def __init__ (self, filename, size, roms): - self.name = filename - self.size = size - self.roms = roms - - def __len__ (self): - return len(self.roms) - - def __getitem__(self,i): - return self.roms[i] - -class pci_rom: - """A pci_rom statement""" - def __init__ (self, filename, vendor, device): - self.name = filename - self.pci_vid = vendor - self.pci_did = device - -class initinclude: - """include file for initialization code""" - def __init__ (self, str, path): - self.string = str - self.path = path - - def getstring(self): - return self.string - - def getpath(self): - return self.path - -class makerule: - """Rule to be included in Makefile""" - def __init__ (self, target): - self.target = target - self.dependency = [] - self.actions = [] - - def addaction(self, action): - self.actions.append(action) - - def adddependency(self, dependency): - self.dependency.append(dependency) - - def gtarget(self): - return self.target - - def gdependency(self): - return self.dependency - - def gaction(self): - return self.actions - -class option: - """Configuration option""" - def __init__ (self, name): - self.name = name # name of option - self.loc = 0 # current location - self.used = 0 # option has been used - # it is undefined) - self.comment = '' # description of option - self.exportable = 0 # option is able to be exported - self.format = '%s' # option print format - self.write = [] # parts that can set this option - - def where(self): - return self.loc - - def setcomment(self, comment, loc): - if (self.comment != ''): - print "%s: " % self.name - print "Attempt to modify comment at %s" % loc - return - self.comment = comment - - def setexportable(self): - self.exportable = 1 - - def setnoexport(self): - self.exportable = 0 - - def setformat(self, fmt): - self.format = fmt - - def getformat(self): - return self.format - - def setused(self): - if (self.exportable): - self.exported = 1 - self.used = 1 - - def setwrite(self, part): - self.write.append(part) - - def isexportable(self): - return self.exportable - - def iswritable(self, part): - return (part in self.write) - -class option_value: - """Value of a configuration option. The option has a default - value which can be changed at any time. Once an option has been - set the default value is no longer used.""" - def __init__(self, name, prev): - self.name = name - self.value = '' - self.set = 0 - if (prev): - self.value = prev.value - self.set = prev.set - - - def setvalue(self, value): - if ((self.set & 2) == 2): - warning("Changing option %s" % self.name) - else: - self.set |= 2 - self.value = value - - def setdefault(self, value): - if ((self.set & 1) == 1): - notice("Changing default value of %s" % self.name) - - if ((self.set & 2) == 0): - self.value = value - self.set |= 1 - - def contents(self): - return self.value - - def isset(self): - return (self.set & 2) == 2 - - -class partobj: - """A configuration part""" - def __init__ (self, image, dir, parent, part, type_name, instance_name, chip_or_device): - if (parent): - debug.info(debug.object, "partobj dir %s parent %s part %s" \ - % (dir, parent.instance_name, part)) - else: - debug.info(debug.object, "partobj dir %s part %s" \ - % (dir, part)) - - # romimage that is configuring this part - self.image = image - - # links for static device tree - self.children = 0 - self.prev_sibling = 0 - self.next_sibling = 0 - self.prev_device = 0 - self.next_device = 0 - self.chip_or_device = chip_or_device - - # list of init code files - self.initcode = [] - - # initializers for static device tree - self.registercode = {} - - # part name - self.part = part - - # type name of this part - self.type_name = type_name - - # object files needed to build this part - self.objects = [] - - # directory containg part files - self.dir = dir - - # instance number, used to distinguish anonymous - # instances of this part - self.instance = image.newpartinstance() - debug.info(debug.object, "INSTANCE %d" % self.instance) - - # Options used by this part - self.uses_options = {} - - # Name of chip config file (0 if not needed) - self.chipconfig = 0 - - # Flag to indicate that we have generated type - # definitions for this part (only want to do it once) - self.done_types = 0 - - # Path to the device - self.path = "" - - # Resources of the device - self.resoruce = "" - self.resources = 0 - - # Enabled state of the device - self.enabled = 1 - - # Flag if I am a dumplicate device - self.dup = 0 - - # If no instance name is supplied then generate - # a unique name - if (instance_name == 0): - self.instance_name = self.type_name + \ - "_dev%d" % self.instance - self.chipinfo_name = "%s_info_%d" \ - % (self.type_name, self.instance) - else: - self.instance_name = instance_name - self.chipinfo_name = "%s_info_%d" % (self.instance_name, self.instance) - - # Link this part into the device list - if (self.chip_or_device == 'device'): - if (image.last_device): - image.last_device.next_device = self - self.prev_device = image.last_device - image.last_device = self - - # Link this part into the tree - if (parent and (part != 'arch')): - debug.info(debug.gencode, "add to parent") - self.parent = parent - # add current child as my sibling, - # me as the child. - if (parent.children): - debug.info(debug.gencode, "add %s (%d) as sibling" % (parent.children.dir, parent.children.instance)) - youngest = parent.children - while(youngest.next_sibling): - youngest = youngest.next_sibling - youngest.next_sibling = self - self.prev_sibling = youngest - else: - parent.children = self - else: - self.parent = self - - - def info(self): - return "%s: %s" % (self.part, self.type) - def type(self): - return self.chip_or_device - - def readable_name(self): - name = "" - name = "%s_%d" % (self.type_name, self.instance) - if (self.chip_or_device == 'chip'): - name = "%s %s %s" % (name, self.part, self.dir) - else: - name = "%s %s" % (name, self.path) - return name - - def graph_name(self): - name = "{ {_dev%d|" % self.instance - if (self.part): - name = "%s%s" % (name, self.part) - else: - name = "%s%s" % (name, self.chip_or_device) - if (self.type_name): - name = "%s}|%s}" % (name, self.type_name) - else: - name = "%s}|%s}" % (name, self.parent.type_name) - return name - - def dumpme(self, lvl): - """Dump information about this part for debugging""" - print "%d: %s" % (lvl, self.readable_name()) - print "%d: part %s" % (lvl, self.part) - print "%d: instance %d" % (lvl, self.instance) - print "%d: chip_or_device %s" % (lvl, self.chip_or_device) - print "%d: dir %s" % (lvl,self.dir) - print "%d: type_name %s" % (lvl,self.type_name) - print "%d: parent: %s" % (lvl, self.parent.readable_name()) - if (self.children): - print "%d: child %s" % (lvl, self.children.readable_name()) - if (self.next_sibling): - print "%d: siblings %s" % (lvl, self.next_sibling.readable_name()) - print "%d: initcode " % lvl - for i in self.initcode: - print "\t%s" % i - print "%d: registercode " % lvl - for f, v in self.registercode.items(): - print "\t%s = %s" % (f, v) - print "\n" - - def firstchilddevice(self): - """Find the first device in the children link.""" - kid = self.children - while (kid): - if (kid.chip_or_device == 'device'): - return kid - else: - kid = kid.children - return 0 - - def firstparentdevice(self): - """Find the first device in the parent link.""" - parent = self.parent - while (parent and (parent.parent != parent) and (parent.chip_or_device != 'device')): - parent = parent.parent - if ((parent.parent != parent) and (parent.chip_or_device != 'device')): - parent = 0 - while(parent and (parent.dup == 1)): - parent = parent.prev_sibling - if (not parent): - fatal("Device %s has no device parent; this is a config file error" % self.readable_name()) - return parent - - def firstparentdevicelink(self): - """Find the first device in the parent link and record which link it is.""" - link = 0 - parent = self.parent - while (parent and (parent.parent != parent) and (parent.chip_or_device != 'device')): - parent = parent.parent - if ((parent.parent != parent) and (parent.chip_or_device != 'device')): - parent = 0 - while(parent and (parent.dup == 1)): - parent = parent.prev_sibling - link = link + 1 - if (not parent): - fatal("Device %s has no device parent; this is a config file error" % self.readable_name()) - return link - - - def firstparentchip(self): - """Find the first chip in the parent link.""" - parent = self.parent - while (parent): - if ((parent.parent == parent) or (parent.chip_or_device == 'chip')): - return parent - else: - parent = parent.parent - fatal("Device %s has no chip parent; this is a config file error" % self.readable_name()) - - def firstsiblingdevice(self): - """Find the first device in the sibling link.""" - sibling = self.next_sibling - while(sibling and (sibling.path == self.path)): - sibling = sibling.next_sibling - if ((not sibling) and (self.parent.chip_or_device == 'chip')): - sibling = self.parent.next_sibling - while(sibling): - if (sibling.chip_or_device == 'device'): - return sibling - else: - sibling = sibling.children - return 0 - - def gencode(self, file, pass_num): - """Generate static initalizer code for this part. Two passes - are used - the first generates type information, and the second - generates instance information""" - if (pass_num == 0): - if (self.chip_or_device == 'chip'): - return; - else: - if (self.instance): - file.write("struct device %s;\n" \ - % self.instance_name) - else: - file.write("struct device dev_root;\n") - return - # This is pass the second, which is pass number 1 - # this is really just a case statement ... - - if (self.chip_or_device == 'chip'): - if (self.chipconfig): - debug.info(debug.gencode, "gencode: chipconfig(%d)" % \ - self.instance) - file.write("struct %s_config %s" % (self.type_name ,\ - self.chipinfo_name)) - if (self.registercode): - file.write("\t= {\n") - for f, v in self.registercode.items(): - file.write( "\t.%s = %s,\n" % (f, v)) - file.write("};\n") - else: - file.write(";") - file.write("\n") - - if (self.instance == 0): - self.instance_name = "dev_root" - file.write("struct device **last_dev_p = &%s.next;\n" % (self.image.last_device.instance_name)) - file.write("struct device dev_root = {\n") - file.write("\t.ops = &default_dev_ops_root,\n") - file.write("\t.bus = &dev_root.link[0],\n") - file.write("\t.path = { .type = DEVICE_PATH_ROOT },\n") - file.write("\t.enabled = 1,\n\t.links = 1,\n") - file.write("\t.on_mainboard = 1,\n") - file.write("\t.link = {\n\t\t[0] = {\n") - file.write("\t\t\t.dev=&dev_root,\n\t\t\t.link = 0,\n") - file.write("\t\t\t.children = &%s,\n" % self.firstchilddevice().instance_name) - file.write("\t\t},\n") - file.write("\t},\n") - if (self.chipconfig): - file.write("\t.chip_ops = &%s_ops,\n" % self.type_name) - file.write("\t.chip_info = &%s_info_%s,\n" % (self.type_name, self.instance)) - file.write("\t.next = &%s,\n" % self.firstchilddevice().instance_name) - file.write("};\n") - return - - # Don't print duplicate devices, just print their children - if (self.dup): - return - - file.write("struct device %s = {\n" % self.instance_name) - file.write("\t.ops = 0,\n") - file.write("\t.bus = &%s.link[%d],\n" % \ - (self.firstparentdevice().instance_name, \ - self.firstparentdevicelink())) - file.write("\t.path = {%s},\n" % self.path) - file.write("\t.enabled = %d,\n" % self.enabled) - file.write("\t.on_mainboard = 1,\n") - if (self.resources): - file.write("\t.resources = %d,\n" % self.resources) - file.write("\t.resource = {%s\n\t },\n" % self.resource) - file.write("\t.link = {\n"); - links = 0 - bus = self - while(bus and (bus.path == self.path)): - child = bus.firstchilddevice() - if (child or (bus != self) or (bus.next_sibling and (bus.next_sibling.path == self.path))): - file.write("\t\t[%d] = {\n" % links) - file.write("\t\t\t.link = %d,\n" % links) - file.write("\t\t\t.dev = &%s,\n" % self.instance_name) - if (child): - file.write("\t\t\t.children = &%s,\n" %child.instance_name) - file.write("\t\t},\n") - links = links + 1 - if (1): - bus = bus.next_sibling - else: - bus = 0 - file.write("\t},\n") - file.write("\t.links = %d,\n" % (links)) - sibling = self.firstsiblingdevice(); - if (sibling): - file.write("\t.sibling = &%s,\n" % sibling.instance_name) - chip = self.firstparentchip() - if (chip and chip.chipconfig): - file.write("\t.chip_ops = &%s_ops,\n" % chip.type_name) - file.write("\t.chip_info = &%s_info_%s,\n" % (chip.type_name, chip.instance)) - if (self.next_device): - file.write("\t.next=&%s\n" % self.next_device.instance_name) - file.write("};\n") - return - - def addinit(self, code): - """Add init file to this part""" - self.initcode.append(code) - - def addconfig(self, path): - """Add chip config file to this part""" - self.chipconfig = os.path.join(self.dir, path) - self.image.addconfiginclude(self.type_name, self.chipconfig) - - def addregister(self, field, value): - """Register static initialization information""" - if (self.chip_or_device != 'chip'): - fatal("Only chips can have register values") - field = dequote(field) - value = dequote(value) - setdict(self.registercode, field, value) - - def set_enabled(self, enabled): - self.enabled = enabled - - def start_resources(self): - self.resource = "" - self.resources = 0 - - def end_resources(self): - self.resource = "%s" % (self.resource) - - def add_resource(self, type, index, value): - """ Add a resource to a device """ - self.resource = "%s\n\t\t{ .flags=%s, .index=0x%x, .base=0x%x}," % (self.resource, type, index, value) - self.resources = self.resources + 1 - - def set_path(self, path): - self.path = path - if (self.prev_sibling and (self.prev_sibling.path == self.path)): - self.dup = 1 - if (self.prev_device): - self.prev_device.next_device = self.next_device - if (self.next_device): - self.next_device.prev_device = self.prev_device - if (self.image.last_device == self): - self.image.last_device = self.prev_device - self.prev_device = 0 - self.next_device = 0 - - def addpcipath(self, slot, function): - """ Add a relative pci style path from our parent to this device """ - if ((slot < 0) or (slot > 0x1f)): - fatal("Invalid device id") - if ((function < 0) or (function > 7)): - fatal("Invalid pci function %s" % function ) - self.set_path(".type=DEVICE_PATH_PCI,{.pci={ .devfn = PCI_DEVFN(0x%x,%d)}}" % (slot, function)) - - def addpnppath(self, port, device): - """ Add a relative path to a pnp device hanging off our parent """ - if ((port < 0) or (port > 65536)): - fatal("Invalid port") - if ((device < 0) or (device > 0xffff)): - fatal("Invalid device") - self.set_path(".type=DEVICE_PATH_PNP,{.pnp={ .port = 0x%x, .device = 0x%x }}" % (port, device)) - - def addi2cpath(self, device): - """ Add a relative path to a i2c device hanging off our parent """ - if ((device < 0) or (device > 0x7f)): - fatal("Invalid device") - self.set_path(".type=DEVICE_PATH_I2C,{.i2c={ .device = 0x%x }}" % (device)) - - def addapicpath(self, apic_id): - """ Add a relative path to a cpu device hanging off our parent """ - if ((apic_id < 0) or (apic_id > 255)): - fatal("Invalid device") - self.set_path(".type=DEVICE_PATH_APIC,{.apic={ .apic_id = 0x%x }}" % (apic_id)) - - def addpci_domainpath(self, pci_domain): - """ Add a pci_domain number to a chip """ - if ((pci_domain < 0) or (pci_domain > 0xffff)): - fatal("Invalid pci_domain: 0x%x is out of the range 0 to 0xffff" % pci_domain) - self.set_path(".type=DEVICE_PATH_PCI_DOMAIN,{.pci_domain={ .domain = 0x%x }}" % (pci_domain)) - - def addapic_clusterpath(self, cluster): - """ Add an apic cluster to a chip """ - if ((cluster < 0) or (cluster > 15)): - fatal("Invalid apic cluster: %d is out of the range 0 to ff" % cluster) - self.set_path(".type=DEVICE_PATH_APIC_CLUSTER,{.apic_cluster={ .cluster = 0x%x }}" % (cluster)) - - def addcpupath(self, cpu_id): - """ Add a relative path to a cpu device hanging off our parent """ - if ((cpu_id < 0) or (cpu_id > 255)): - fatal("Invalid device") - self.set_path(".type=DEVICE_PATH_CPU,{.cpu={ .id = 0x%x }}" % (cpu_id)) - - - def addcpu_buspath(self, id): - """ Add a cpu_bus to a chip """ - if ((id < 0) or (id > 255)): - fatal("Invalid device") - self.set_path(".type=DEVICE_PATH_CPU_BUS,{.cpu_bus={ .id = 0x%x }}" % (id)) - - def usesoption(self, name): - """Declare option that can be used by this part""" - global global_options - o = getdict(global_options, name) - if (o == 0): - fatal("can't use undefined option %s" % name) - o1 = getdict(self.uses_options, name) - if (o1): - return - setdict(self.uses_options, name, o) - exportoption(o, self.image.exported_options) - -# ----------------------------------------------------------------------------- -# statements -# ----------------------------------------------------------------------------- - -def getdict(dict, name): - if name not in dict.keys(): - debug.info(debug.dict, "Undefined: %s" % name) - return 0 - v = dict.get(name, 0) - debug.info(debug.dict, "getdict %s returning %s" % (name, v)) - return v - -def setdict(dict, name, value): - debug.info(debug.dict, "setdict sets %s to %s" % (name, value)) - if name in dict.keys(): - print "Duplicate in dict: %s" % name - dict[name] = value - -# options. -# to create an option, it has to not exist. -# When an option value is fetched, the fact that it was used is -# remembered. -# Legal things to do: -# set a default value, then set a real value before the option is used. -# set a value, try to set a default, default silently fails. -# Illegal: -# use the value, then try to set the value - -def newoption(name): - global global_options, global_options_by_order - o = getdict(global_options, name) - if (o): - fatal("option %s already defined" % name) - o = option(name) - setdict(global_options, name, o) - global_options_by_order.append(name) - -def newoptionvalue(name, image): - g = getdict(global_option_values, name) - v = option_value(name, g) - if (image): - setdict(image.getvalues(), name, v) - else: - setdict(global_option_values, name, v) - return v - -def getoptionvalue(name, op, image): - global global_option_values - #print "getoptionvalue name %s op %s image %s\n" % (name, op,image) - if (op == 0): - # we want to debug config files, not the config tool, so no: - # print_stack() - fatal("Option %s undefined (missing use command?)" % name) - if (image): - v = getdict(image.getvalues(), name) - else: - v = getdict(global_option_values, name) - return v - -def getoption(name, image): - """option must be declared before being used in a part - if we're not processing a part, then we must - be at the top level where all options are available""" - - global global_uses_options, alloptions, curimage - - #print "getoption: name %s image %s alloptions %s curimage %s\n\n" % (name, image, alloptions, curimage) - curpart = partstack.tos() - if (alloptions): - o = getdict(global_options, name) - elif (curpart): - o = getdict(curpart.uses_options, name) - if (o == 0): - print "curpart.uses_options is %s\n" % curpart.uses_options - else: - o = getdict(global_uses_options, name) - v = getoptionvalue(name, o, image) - if (v == 0): - v = getoptionvalue(name, o, 0) - if (v == 0): - fatal("No value for option %s" % name) - val = v.contents() - if (not (type(val) is types.StringType)): - return v.contents() - if (val == '' or val[0] != '{'): - return v.contents() - s = curimage - curimage = image - val = parse('delexpr', val) - curimage = s - exitiferrors() - return val - -def setoption(name, value, imp): - """Set an option from within a configuration file. Normally this - is only permitted in the target (top level) configuration file. - If 'imp' is true, then set an option implicitly (e.g. 'arch' - and 'mainboard' statements). Implicit options can be set anywhere - the statements are legal, but also performs an implicit 'uses' - for the option""" - - global loc, global_options, global_option_values, curimage - - curpart = partstack.tos() - if (not imp and curpart): - fatal("Options may only be set in target configuration file") - if (imp): - usesoption(name) - if (curpart): - o = getdict(curpart.uses_options, name) - else: - o = getdict(global_uses_options, name) - if (not o): - fatal("Attempt to set nonexistent option %s (missing USES?)" % name) - v = getoptionvalue(name, o, curimage) - if (v == 0): - v = newoptionvalue(name, curimage) - v.setvalue(value) - -def exportoption(op, exported_options): - if (not op.isexportable()): - return - if (not op in exported_options): - exported_options.append(op) - -def setdefault(name, value, isdef): - """Set the default value of an option from within a configuration - file. This is permitted from any configuration file, but will - result in a warning if the default is set more than once. - If 'isdef' is set, we're defining the option in Options.lb so - there is no need for 'uses'.""" - - global loc, global_options, curimage - - if (isdef): - o = getdict(global_options, name) - if (not o): - return - image = 0 - else: - curpart = partstack.tos() - if (curpart): - o = getdict(curpart.uses_options, name) - else: - o = getdict(global_uses_options, name) - if (not o): - fatal("Attempt to set default for nonexistent option %s (missing USES?)" % name) - image = curimage - - v = getoptionvalue(name, o, image) - if (v == 0): - v = newoptionvalue(name, image) - v.setdefault(value) - -def setnodefault(name): - global loc, global_options - o = getdict(global_options, name) - if (not o): - return - v = getdict(global_option_values, name) - if (v != 0): - warning("removing default for %s" % name) - del global_option_values[name] - -def setcomment(name, value): - global loc, global_options - o = getdict(global_options, name) - if (not o): - fatal("setcomment: %s not here" % name) - o.setcomment(value, loc) - -def setexported(name): - global global_options - o = getdict(global_options, name) - if (not o): - fatal("setexported: %s not here" % name) - o.setexportable() - global_exported_options.append(o) - -def setnoexport(name): - global global_options - o = getdict(global_options, name) - if (not o): - fatal("setnoexport: %s not here" % name) - o.setnoexport() - if (o in global_exported_options): - global_exported_options.remove(o) - -def setexportable(name): - global global_options - o = getdict(global_options, name) - if (not o): - fatal("setexportable: %s not here" % name) - o.setexportable() - -def setformat(name, fmt): - global global_options - o = getdict(global_options, name) - if (not o): - fatal("setformat: %s not here" % name) - o.setformat(fmt) - -def getformated(name, image): - global global_options, global_option_values - o = getdict(global_options, name) - v = getoption(name, image) - f = o.getformat() - return (f % v) - -def setwrite(name, part): - global global_options - o = getdict(global_options, name) - if (not o): - fatal("setwrite: %s not here" % name) - o.setwrite(part) - -def hasvalue(name, image): - global global_options - o = getdict(global_options, name) - if (o == 0): - return 0 - v = 0 - if (image): - v = getdict(image.getvalues(), name) - if (v == 0): - v = getdict(global_option_values, name) - return (v != 0) - -def isset(name, part): - global global_uses_options, global_option_values, curimage - if (part): - o = getdict(part.uses_options, name) - else: - o = getdict(global_uses_options, name) - if (o == 0): - return 0 - v = 0 - if (curimage): - v = getdict(curimage.getvalues(), name) - if (v == 0): - v = getdict(global_option_values, name) - return (v != 0 and v.isset()) - -def usesoption(name): - global global_options, global_uses_options - curpart = partstack.tos() - if (curpart): - curpart.usesoption(name) - return - o = getdict(global_options, name) - if (o == 0): - fatal("Can't use undefined option %s" % name) - o1 = getdict(global_uses_options, name) - if (o1): - return - setdict(global_uses_options, name, o) - exportoption(o, global_exported_options) - -def validdef(name, defval): - global global_options - o = getdict(global_options, name) - if (not o): - fatal("validdef: %s not here" % name) - if ((defval & 1) != 1): - fatal("Must specify default value for option %s" % name) - if ((defval & 2) != 2): - fatal("Must specify export for option %s" % name) - if ((defval & 4) != 4): - fatal("Must specify comment for option %s" % name) - -def loadoptions(path, file, rule): - file = os.path.join('src', path, file) - optionsfile = os.path.join(treetop, file) - fp = safe_open(optionsfile, 'r') - loc.push(file) - if (not parse(rule, fp.read())): - fatal("Could not parse file") - loc.pop() - -def addinit(path): - global curimage, dirstack - if (path[0] == '/'): - curimage.setinitfile(treetop + '/src/' + path) - else: - curimage.setinitfile(dirstack.tos() + '/' + path) - print "Adding init file: %s" % path - -def addconfig(path): - global partstack - curpart = partstack.tos() - curpart.addconfig(path) - -def addregister(field, value): - global partstack - curpart = partstack.tos() - curpart.addregister(field, value) - -def addcrt0include(path): - """we do the crt0include as a dictionary, so that if needed we - can trace who added what when. Also it makes the keys - nice and unique.""" - global curimage - curimage.addinitinclude(0, path) - -def addinitinclude(str, path): - global curimage - curimage.addinitinclude(dequote(str), path) - -def addldscript(path): - global curimage, dirstack - curdir = dirstack.tos() - if (path[0] == '/'): - fullpath = treetop + '/src/' + path - else: - fullpath = curdir + '/' + path - debug.info(debug.statement, "fullpath :%s: curdir :%s: path :%s:" % (fullpath, curdir, path)) - curimage.addldscript(fullpath) - -def payload(path): - global curimage - curimage.setpayload(path) - adduserdefine("PAYLOAD:=%s"%path) - -def startromimage(name): - global romimages, curimage, target_dir, target_name - curpart = partstack.tos() - print "Configuring ROMIMAGE %s Curimage %s" % (name, curimage) - print "Curpart is %s\n" % curpart - o = getdict(romimages, name) - if (o): - fatal("romimage %s previously defined" % name) - curimage = romimage(name) - curimage.settargetdir(os.path.join(target_dir, name)) - #o = partobj(curimage, target_dir, 0, 'board', target_name) - #curimage.setroot(o) - setdict(romimages, name, curimage) - dodir('/config', 'Config.lb') - -def endromimage(): - global curimage - global bootblocksize - mainboard() - imagesize = getoption("CONFIG_ROM_IMAGE_SIZE", curimage) - bootblocksize += imagesize - print "End ROMIMAGE" - curimage = 0 - #curpart = 0 - -def mainboardsetup(path): - global full_mainboard_path, mainboard_path - mainboard_path = os.path.join('mainboard', path) - loadoptions(mainboard_path, 'Options.lb', 'mainboardvariables') - full_mainboard_path = os.path.join(treetop, 'src', 'mainboard', path) - vendor = re.sub("/.*", "", path) - part_number = re.sub("[^/]*/", "", path) - setdefault('CONFIG_MAINBOARD', full_mainboard_path, 0) - setdefault('CONFIG_MAINBOARD_VENDOR', vendor, 0) - setdefault('CONFIG_MAINBOARD_PART_NUMBER', part_number, 0) - -def mainboard(): - global curimage, dirstack, partstack - file = 'Config.lb' - partdir = mainboard_path - srcdir = os.path.join(treetop, 'src') - fulldir = os.path.join(srcdir, partdir) - type_name = flatten_name(partdir) - newpart = partobj(curimage, fulldir, partstack.tos(), 'mainboard', \ - 'mainboard', 0, 'chip') - #print "Configuring PART %s" % (type) - partstack.push(newpart) - #print " new PART tos is now %s\n" %partstack.tos().info() - dirstack.push(fulldir) - loadoptions(mainboard_path, 'Options.lb', 'mainboardvariables') - # special case for 'cpu' parts. - # we could add a new function too, but this is rather trivial. - # if the part is a cpu, and we haven't seen it before, - # arrange to source the directory /cpu/'type' - doconfigfile(srcdir, partdir, file, 'cfgfile') - curimage.setroot(partstack.tos()) - partpop() - -def addbuildrom(filename, size, roms): - global buildroms - print "Build ROM size %d" % size - b = buildrom(filename, size, roms) - buildroms.append(b) - -def addpci_rom(filename, vendor, device): - global pciroms - print "Add PCI ROM %s" %filename - p = pci_rom(filename, vendor, device) - pciroms.append(p) - -def addinitobject(object_name): - global curimage - curimage.addinitobjectrule(object_name) - -def addobject(object_name): - global curimage - curimage.addobjectrule(object_name) - -def adddriver(driver_name): - global curimage - curimage.adddriverrule(driver_name) - -def addsmmobject(object_name): - global curimage - curimage.addsmmobjectrule(object_name) - -def target(name): - global target_dir, target_name - print "Configuring TARGET %s" % name - target_name = name - target_dir = os.path.join(os.path.dirname(loc.file()), name) - if not os.path.isdir(target_dir): - print "Creating directory %s" % target_dir - os.makedirs(target_dir) - print "Will place Makefile, crt0.S, etc. in %s" % target_dir - - -def cpudir(path): - global cpu_type - if (cpu_type and (cpu_type != path)): - fatal("Two different CPU types: %s and %s" % (cpu_type, path)) - srcdir = "/cpu/%s" % path - dodir(srcdir, "Config.lb") - cpu_type = path - -def devicepart(type): - global curimage, dirstack, partstack - newpart = partobj(curimage, 0, partstack.tos(), type, \ - '', 0, 'device') - #print "Configuring PART %s" % (type) - partstack.push(newpart) - #print " new PART tos is now %s\n" %partstack.tos().info() - # just push TOS, so that we can pop later. - dirstack.push(dirstack.tos()) - -def part(type, path, file, name): - global curimage, dirstack, partstack - partdir = os.path.join(type, path) - srcdir = os.path.join(treetop, 'src') - fulldir = os.path.join(srcdir, partdir) - type_name = flatten_name(partdir) - newpart = partobj(curimage, fulldir, partstack.tos(), type, \ - type_name, name, 'chip') - #print "Configuring PART %s, path %s" % (type, path) - partstack.push(newpart) - #print " new PART tos is now %s\n" %partstack.tos().info() - dirstack.push(fulldir) - # special case for 'cpu' parts. - # we could add a new function too, but this is rather trivial. - # if the part is a cpu, and we haven't seen it before, - # arrange to source the directory /cpu/'type' - if (type == 'cpu'): - cpudir(path) - else: - doconfigfile(srcdir, partdir, file, 'cfgfile') - -def partpop(): - global dirstack, partstack - curpart = partstack.tos() - if (curpart == 0): - fatal("Trying to pop non-existent part") - #print "End PART %s" % curpart.part - # Warn if options are used without being set in this part - for op in curpart.uses_options.keys(): - if (not isset(op, curpart)): - notice("Option %s using default value %s" % (op, getformated(op, curpart.image))) - oldpart = partstack.pop() - dirstack.pop() - #print "partstack.pop, TOS is now %s\n" % oldpart.info() - -def dodir(path, file): - """dodir is like part but there is no new part""" - global dirstack - # if the first char is '/', it is relative to treetop, - # else relative to curdir - # os.path.join screws up if the name starts with '/', sigh. - print "Configuring DIR %s" % os.path.join(path, file) - if (path[0] == '/'): - fullpath = os.path.join(treetop, 'src') - path = re.sub('^/*', '', path) - else: - fullpath = dirstack.tos() - debug.info(debug.statement, "DODIR: path %s, fullpath %s" % (path, fullpath)) - dirstack.push(os.path.join(fullpath, path)) - doconfigfile(fullpath, path, file, 'cfgfile') - dirstack.pop() - -def dofile(path): - """dofile is a simple include for single files""" - # if the first char is '/', it is relative to treetop, - # else relative to curdir - # os.path.join screws up if the name starts with '/', sigh. - if (path[0] == '/'): - fullpath = os.path.join(treetop, 'src') - path = re.sub('^/*', '', path) - else: - fullpath = dirstack.tos() - print "INCLUDE %s" %path - debug.info(debug.statement, "DOFILE: path %s, fullpath %s" % (path, fullpath)) - doconfigfile(fullpath, '', path, 'cfgfile') - -def lookup(name): - global curimage - return getoption(name, curimage) - -def addrule(id): - global curimage - curimage.addmakerule(id) - -def adduserdefine(str): - global curimage - curimage.adduserdefine(str) - -def addaction(id, str): - global curimage - curimage.addmakeaction(id, str) - -def adddep(id, str): - global curimage - curimage.addmakedepend(id, str) - -def setarch(my_arch): - """arch is 'different' ... darn it.""" - global curimage - print "SETTING CONFIG_ARCH %s\n" % my_arch - curimage.setarch(my_arch) - setdefault('CONFIG_ARCH', my_arch, 1) - part('arch', my_arch, 'Config.lb', 0) - -def doconfigfile(path, confdir, file, rule): - rname = os.path.join(confdir, file) - loc.push(rname) - fullpath = os.path.join(path, rname) - fp = safe_open(fullpath, 'r') - if (not parse(rule, fp.read())): - fatal("Could not parse file") - exitiferrors() - loc.pop() - -#============================================================================= -# MISC FUNCTIONS -#============================================================================= -def ternary(val, yes, no): - debug.info(debug.statement, "ternary %s" % expr) - debug.info(debug.statement, "expr %s a %d yes %d no %d"% (expr, a, yes, no)) - if (val == 0): - debug.info(debug.statement, "Ternary returns %d" % yes) - return yes - else: - debug.info(debug.statement, "Ternary returns %d" % no) - return no - -def tohex(name): - """atoi is in the python library, but not strtol? Weird!""" - return eval('int(%s)' % name) - -def IsInt(str): - """ Is the given string an integer?""" - try: - num = long(str) - return 1 - except ValueError: - return 0 - -def dequote(str): - a = re.sub("^\"", "", str) - a = re.sub("\"$", "", a) - # highly un-intuitive, need four \! - a = re.sub("\\\\\"", "\"", a) - return a - -def flatten_name(str): - a = re.sub("[/-]", "_", str) - return a - -def topify(path): - """If the first part of <path> matches treetop, replace - that part with $(TOP)""" - if path[0:len(treetop)] == treetop: - path = path[len(treetop):len(path)] - if (path[0:1] == "/"): - path = path[1:len(path)] - path = "$(TOP)/" + path - return path - -%% -# to make if work without 2 passses, we use an old hack from SIMD, the -# context bit. If the bit is 1, then ops get done, otherwise -# ops don't get done. From the top level, context is always -# 1. In an if, context depends on eval of the if condition - -parser Config: - ignore: r'\s+' - ignore: "#.*?\r?\n" - - # less general tokens should come first, otherwise they get matched - # by the re's - token ACTION: 'action' - token ADDACTION: 'addaction' - token ALWAYS: 'always' - token ARCH: 'arch' - token BUILDROM: 'buildrom' - token COMMENT: 'comment' - token CONFIG: 'config' - token CPU: 'cpu' - token CPU_BUS: 'cpu_bus' - token CHIP: 'chip' - token DEFAULT: 'default' - token DEFINE: 'define' - token DEPENDS: 'depends' - token DEVICE: 'device' - token DEVICE_ID: 'device_id' - token DIR: 'dir' - token DRIVER: 'driver' - token DRQ: 'drq' - token ELSE: 'else' - token END: 'end' - token EOF: '$' - token EQ: '=' - token EXPORT: 'export' - token FORMAT: 'format' - token IF: 'if' - token INIT: 'init' - token INITOBJECT: 'initobject' - token INITINCLUDE: 'initinclude' - token INCLUDE: 'include' - token IO: 'io' - token IRQ: 'irq' - token LDSCRIPT: 'ldscript' - token LOADOPTIONS: 'loadoptions' - token MAINBOARD: 'mainboard' - token MAINBOARDINIT: 'mainboardinit' - token MAKEDEFINE: 'makedefine' - token MAKERULE: 'makerule' - token MEM: 'mem' - token NEVER: 'never' - token NONE: 'none' - token NORTHBRIDGE: 'northbridge' - token OBJECT: 'object' - token OPTION: 'option' - token PAYLOAD: 'payload' - token PCI_ROM: 'pci_rom' - token PMC: 'pmc' - token PRINT: 'print' - token REGISTER: 'register' - token ROMIMAGE: 'romimage' - token SMMOBJECT: 'smmobject' - token SOUTHBRIDGE: 'southbridge' - token SUPERIO: 'superio' - token TARGET: 'target' - token USED: 'used' - token USES: 'uses' - token VENDOR_ID: 'vendor_id' - token WRITE: 'write' - token NUM: '[0-9]+' - token HEX_NUM: '[0-9a-fA-F]+' - token HEX_PREFIX: '0x' - # Why is path separate? Because paths to resources have to at least - # have a slash, we thinks - token PATH: r'[-a-zA-Z0-9_.][-a-zA-Z0-9/_.]+[-a-zA-Z0-9_.]+' - # Dir's on the other hand are abitrary - # this may all be stupid. - token RULE: r'[-a-zA-Z0-9_$()./]+[-a-zA-Z0-9_ $()./]+[-a-zA-Z0-9_$()./]+' - token DIRPATH: r'[-a-zA-Z0-9_$()./,]+' - token ID: r'[a-zA-Z_.]+[a-zA-Z0-9_.]*' - token DELEXPR: r'{([^}]+|\\.)*}' - token STR: r'"([^\\"]+|\\.)*"' - token RAWTEXT: r'.*' - token ON: 'on' - token OFF: 'off' - token PCI: 'pci' - token PNP: 'pnp' - token I2C: 'i2c' - token APIC: 'apic' - token APIC_CLUSTER: 'apic_cluster' - token CPU: 'cpu' - token CPU_BUS: 'cpu_bus' - token PCI_DOMAIN: 'pci_domain' - - - rule expr: logical {{ l = logical }} - ( "&&" logical {{ l = l and logical }} - | "[|][|]" logical {{ l = l or logical }} - )* {{ return l }} - - rule logical: factor {{ n = factor }} - ( "[+]" factor {{ n = n+factor }} - | "-" factor {{ n = n-factor }} - )* {{ return n }} - - rule factor: term {{ v = term }} - ( "[*]" term {{ v = v*term }} - | "/" term {{ v = v/term }} - | "<<" term {{ v = v << term }} - | ">=" term {{ v = (v < term)}} - )* {{ return v }} - - # A term is a number, variable, or an expression surrounded by parentheses - rule term: NUM {{ return long(NUM, 10) }} - | HEX_PREFIX HEX_NUM {{ return long(HEX_NUM, 16) }} - | ID {{ return lookup(ID) }} - | unop {{ return unop }} - | "\\(" expr "\\)" {{ return expr }} - - rule unop: "!" expr {{ return not(expr) }} - - rule partend<<C>>: (stmt<<C>>)* END {{ if (C): partpop()}} - - # This is needed because the legacy cpu command could not distinguish - # between cpu vendors. It should just be PATH, but getting this change - # into the source tree will be tricky... - # DO NOT USE ID AS IT MAY GO AWAY IN THE FUTURE - rule partid: ID {{ return ID }} - | PATH {{ return PATH }} - -# rule parttype: NORTHBRIDGE {{ return 'northbridge' }} -# | SUPERIO {{ return 'superio' }} -# | PMC {{ return 'pmc' }} -# | SOUTHBRIDGE {{ return 'southbridge' }} -# | CPU {{ return 'cpu' }} -# | CHIP {{ return '' }} -# - rule parttype: CHIP {{ return '' }} - - rule partdef<<C>>: {{ name = 0 }} - parttype partid - [ STR {{ name = dequote(STR) }} - ] {{ if (C): part(parttype, partid, 'Config.lb', name) }} - partend<<C>> - - rule arch<<C>>: ARCH ID {{ if (C): setarch(ID) }} - partend<<C>> - - rule mainboardinit<<C>>: - MAINBOARDINIT DIRPATH {{ if (C): addcrt0include(DIRPATH)}} - - rule initinclude<<C>>: - INITINCLUDE - STR - DIRPATH {{ if (C): addinitinclude(STR, DIRPATH)}} - - rule initobject<<C>>: - INITOBJECT DIRPATH {{ if (C): addinitobject(DIRPATH)}} - - rule object<<C>>: OBJECT DIRPATH {{ if (C): addobject(DIRPATH)}} - - rule driver<<C>>: DRIVER DIRPATH {{ if (C): adddriver(DIRPATH)}} - - rule smmobject<<C>>: - SMMOBJECT DIRPATH {{ if (C): addsmmobject(DIRPATH)}} - - - rule dir<<C>>: DIR DIRPATH {{ if (C): dodir(DIRPATH, 'Config.lb') }} - - rule default<<C>>: DEFAULT ID EQ value {{ if (C): setdefault(ID, value, 0) }} - - rule ldscript<<C>>: LDSCRIPT DIRPATH {{ if (C): addldscript(DIRPATH) }} - - rule iif<<C>>: IF ID {{ c = lookup(ID) }} - (stmt<<C and c>>)* - [ ELSE (stmt<<C and not c>>)* ] - END - - rule makerule<<C>>: MAKERULE RULE {{ if (C): addrule(RULE) }} - ( DEPENDS STR {{ if (C): adddep(RULE, STR) }} - | ACTION STR {{ if (C): addaction(RULE, STR) }} - )* - END - - rule makedefine<<C>>: - MAKEDEFINE RAWTEXT {{ if (C): adduserdefine(RAWTEXT) }} - - rule addaction<<C>>: - ADDACTION ID STR {{ if (C): addaction(ID, STR) }} - - rule init<<C>>: INIT DIRPATH {{ if (C): addinit(DIRPATH) }} - - rule field: STR {{ return STR }} - - rule register<<C>>: REGISTER field '=' STR {{ if (C): addregister(field, STR) }} - - rule enable<<C>>: {{ val = 1 }} - ( ON {{ val = 1 }} - | OFF {{ val = 0 }} - ) {{ if(C): partstack.tos().set_enabled(val) }} - - rule resource<<C>>: {{ type = "" }} - ( IO {{ type = "IORESOURCE_FIXED | IORESOURCE_ASSIGNED | IORESOURCE_IO" }} - | MEM {{ type = "IORESOURCE_FIXED | IORESOURCE_ASSIGNED | IORESOURCE_MEM" }} - | IRQ {{ type = "IORESOURCE_FIXED | IORESOURCE_ASSIGNED | IORESOURCE_IRQ" }} - | DRQ {{ type = "IORESOURCE_FIXED | IORESOURCE_ASSIGNED | IORESOURCE_DRQ" }} - ) - term '=' {{ index = term }} - term {{ value = term }} - {{ if (C): partstack.tos().add_resource(type, index, value) }} - - - rule resources<<C>>: {{ if (C): partstack.tos().start_resources() }} - ( resource<<C>> )* - {{ if (C): partstack.tos().end_resources() }} - - - rule pci<<C>>: PCI {{ if (C): devicepart('pci') }} - - HEX_NUM {{ slot = int(HEX_NUM,16) }} - '.' HEX_NUM {{ function = int(HEX_NUM, 16) }} - {{ if (C): partstack.tos().addpcipath(slot, function) }} - rule pci_domain<<C>>: - PCI_DOMAIN {{ if (C): devicepart('pci_domain') }} - HEX_NUM {{ pci_domain = int(HEX_NUM, 16) }} - {{ if (C): partstack.tos().addpci_domainpath(pci_domain) }} - - rule pnp<<C>>: PNP {{ if (C): devicepart('pnp') }} - HEX_NUM {{ port = int(HEX_NUM,16) }} - '.' HEX_NUM {{ device = int(HEX_NUM, 16) }} - {{ if (C): partstack.tos().addpnppath(port, device) }} - - rule i2c<<C>>: I2C {{ if (C): devicepart('i2c') }} - HEX_NUM {{ device = int(HEX_NUM, 16) }} - {{ if (C): partstack.tos().addi2cpath(device) }} - - rule apic<<C>>: APIC {{ if (C): devicepart('apic') }} - HEX_NUM {{ apic_id = int(HEX_NUM, 16) }} - {{ if (C): partstack.tos().addapicpath(apic_id) }} - - rule apic_cluster<<C>>: APIC_CLUSTER {{ if (C): devicepart('apic_cluster') }} - HEX_NUM {{ cluster = int(HEX_NUM, 16) }} - {{ if (C): partstack.tos().addapic_clusterpath(cluster) }} - - rule include<<C>>: INCLUDE DIRPATH {{ dofile(DIRPATH) }} - - rule cpu<<C>>: CPU {{ if (C): devicepart('cpu') }} - HEX_NUM {{ id = int(HEX_NUM, 16) }} - {{ if (C): partstack.tos().addcpupath(id) }} - - rule cpu_bus<<C>>: CPU_BUS {{ if (C): devicepart('cpu_bus') }} - HEX_NUM {{ id = int(HEX_NUM, 16) }} - {{ if (C): partstack.tos().addcpu_buspath(id) }} - - rule dev_path<<C>>: - pci<<C>> {{ return pci }} - | pci_domain<<C>> {{ return pci_domain }} - | pnp<<C>> {{ return pnp }} - | i2c<<C>> {{ return i2c }} - | apic<<C>> {{ return apic }} - | apic_cluster<<C>> {{ return apic_cluster }} - | cpu<<C>> {{ return cpu }} - | cpu_bus<<C>> {{ return cpu_bus }} - - rule prtval: expr {{ return str(expr) }} - | STR {{ return STR }} - - rule prtlist: prtval {{ el = "%(" + prtval }} - ( "," prtval {{ el = el + "," + prtval }} - )* {{ return el + ")" }} - - rule prtstmt<<C>>: PRINT STR {{ val = STR }} - [ "," prtlist {{ val = val + prtlist }} - ] {{ if (C): print eval(val) }} - - rule config<<C>>: CONFIG PATH {{ if (C): addconfig(PATH) }} - - rule device<<C>>: DEVICE dev_path<<C>> - enable<<C>> - resources<<C>> - partend<<C>> - - rule stmt<<C>>: arch<<C>> {{ return arch}} - | addaction<<C>> {{ return addaction }} - | config<<C>> {{ return config}} - | default<<C>> {{ return default}} - | dir<<C>> {{ return dir}} - | driver<<C>> {{ return driver }} - | iif<<C>> {{ return iif }} - | init<<C>> {{ return init }} - | initinclude<<C>> {{ return initinclude }} - | include<<C>> {{ return include }} - | initobject<<C>> {{ return initobject }} - | ldscript<<C>> {{ return ldscript}} - | mainboardinit<<C>> {{ return mainboardinit }} - | makedefine<<C>> {{ return makedefine }} - | makerule<<C>> {{ return makerule }} - | object<<C>> {{ return object }} - | option<<C>> {{ return option }} - | partdef<<C>> {{ return partdef }} - | prtstmt<<C>> {{ return prtstmt }} - | register<<C>> {{ return register }} - | device<<C>> {{ return device }} - | smmobject<<C>> {{ return smmobject }} - - # ENTRY for parsing Config.lb file - rule cfgfile: (uses<<1>>)* - (stmt<<1>>)* - EOF {{ return 1 }} - - rule usesid<<C>>: ID {{ if (C): usesoption(ID) }} - - rule uses<<C>>: USES (usesid<<C>>)+ - - rule mainboardvariables: (uses<<1>>)* - (default<<1>>)* - (option<<1>>)* - END {{ return 1}} - - rule value: STR {{ return dequote(STR) }} - | expr {{ return expr }} - | DELEXPR {{ return DELEXPR }} - - rule option<<C>>: OPTION ID EQ value {{ if (C): setoption(ID, value, 0) }} - - rule opif<<C>>: IF ID {{ c = lookup(ID) }} - (opstmt<<C and c>>)* - [ ELSE (opstmt<<C and not c>>)* ] - END - - rule opstmt<<C>>: option<<C>> - | opif<<C>> - | prtstmt<<C>> - - rule payload<<C>>: PAYLOAD DIRPATH {{ if (C): payload(DIRPATH) }} - - rule mainboard: - MAINBOARD PATH {{ mainboardsetup(PATH) }} - - rule romif<<C>>: IF ID {{ c = lookup(ID) }} - (romstmt<<C and c>>)* - [ ELSE (romstmt<<C and not c>>)* ] - END - - rule romstmt<<C>>: romif<<C>> - | option<<C>> - | payload<<C>> - - rule romimage: ROMIMAGE STR {{ startromimage(dequote(STR)) }} - (romstmt<<1>>)* - END {{ endromimage() }} - - rule roms: STR {{ s = '[' + STR }} - ( STR {{ s = s + "," + STR }} - )* {{ return eval(s + ']') }} - - rule buildrom: BUILDROM DIRPATH expr roms {{ addbuildrom(DIRPATH, expr, roms) }} - - rule pci_vid: VENDOR_ID EQ term {{ return term }} - - rule pci_did: DEVICE_ID EQ term {{ return term }} - - - rule pci_rom: PCI_ROM DIRPATH pci_vid pci_did {{ addpci_rom(DIRPATH, pci_vid, pci_did) }} - - rule romstmts: romimage - | buildrom - | opstmt<<1>> - | pci_rom - - # ENTRY for parsing root part - rule board: {{ loadoptions("config", "Options.lb", "options") }} - TARGET DIRPATH {{ target(DIRPATH) }} - mainboard - (romstmts)* - EOF {{ return 1 }} - - # ENTRY for parsing a delayed value - rule delexpr: "{" expr "}" EOF {{ return expr }} - - rule wrstr<<ID>>: STR {{ setwrite(ID, dequote(STR)) }} - - rule defstmts<<ID>>: {{ d = 0 }} - ( DEFAULT - ( value {{ setdefault(ID, value, 1) }} - | NONE {{ setnodefault(ID) }} - ) {{ d = d | 1 }} - | FORMAT STR {{ setformat(ID, dequote(STR)) }} - | EXPORT - ( ALWAYS {{ setexported(ID) }} - | USED {{ setexportable(ID) }} - | NEVER {{ setnoexport(ID) }} - ) {{ d = d | 2 }} - | COMMENT STR {{ setcomment(ID, dequote(STR)); d = d | 4 }} - | WRITE (wrstr<<ID>>)+ - )+ {{ return d }} - - rule define: DEFINE ID {{ newoption(ID) }} - defstmts<<ID>> END {{ validdef(ID, defstmts) }} - - # ENTRY for parsing Options.lb file - rule options: (define)* EOF {{ return 1 }} -%% - -#============================================================================= -# FILE OUTPUT -#============================================================================= -def writemakefileheader(file, fname): - file.write("# File: %s is autogenerated\n" % fname) - -def writemakefilefooter(file, fname): - file.write("\n\n%s: %s %s\n" - % (os.path.basename(fname), os.path.abspath(sys.argv[0]), top_config_file)) - file.write("\t(cd %s ; export PYTHONPATH=%s/util/newconfig ; python %s %s %s)\n\n" - % (os.getcwd(), treetop, sys.argv[0], sys.argv[1], sys.argv[2])) - -def writemakefilesettings(path): - """ Write Makefile.settings to seperate the settings - from the actual makefile creation.""" - - global treetop, target_dir - - filename = os.path.join(path, "Makefile.settings") - print "Creating", filename - file = safe_open(filename, 'w+') - writemakefileheader(file, filename) - file.write("TOP:=%s\n" % (treetop)) - file.write("TARGET_DIR:=%s\n" % target_dir) - writemakefilefooter(file, filename) - file.close() - -def writeimagesettings(image): - """Write Makefile.settings to seperate the settings - from the actual makefile creation.""" - - global treetop - global global_options_by_order - - filename = os.path.join(image.gettargetdir(), "Makefile.settings") - print "Creating", filename - file = safe_open(filename, 'w+') - writemakefileheader(file, filename) - file.write("TOP:=%s\n" % (treetop)) - file.write("TARGET_DIR:=%s\n" % (image.gettargetdir())) - file.write("\n") - exported = [] - for o in global_exported_options: - exported.append(o) - for o in image.exported_options: - if (not o in exported): - exported.append(o) - for o in exported: - file.write("export %s:=" % o.name) - if (hasvalue(o.name, image)): - file.write("%s" % getformated(o.name, image)) - file.write("\n") - file.write("\n") - file.write("export VARIABLES :=\n") - for o in exported: - file.write("export VARIABLES += %s\n" % o.name) - file.write("\n") - # writemakefilefooter(file,filename) - file.close() - -# write the romimage makefile -# let's try the Makefile -# first, dump all the -D stuff - -def writeimagemakefile(image): - makefilepath = os.path.join(image.gettargetdir(), "Makefile") - print "Creating", makefilepath - file = safe_open(makefilepath, 'w+') - writemakefileheader(file, makefilepath) - - # main rule - file.write("\nall: coreboot.rom\n\n") - file.write(".PHONY: all\n\n") - #file.write("include cpuflags\n") - # Putting "include cpuflags" in the Makefile has the problem that the - # cpuflags file would be generated _after_ we want to include it. - # Instead, let make do the work of computing CPUFLAGS: - file.write("# Get the value of TOP, VARIABLES, and several other variables.\n") - file.write("include Makefile.settings\n\n") - file.write("# Function to create an item like -Di586 or -DCONFIG_MAX_CPUS='1' or -Ui686\n") - file.write("D_item = $(shell echo '$(if $(subst undefined,,$(origin $1)),\\#define $1$(if $($1), $($1),),\\#undef $1)' >> settings.h)\n\n") - file.write("# Compute the value of CPUFLAGS here during make's first pass.\n") - file.write("CPUFLAGS := $(strip $(shell echo '/* autogenerated */' > settings.h)$(foreach _var_,$(VARIABLES),$(call D_item,$(_var_)))-include $(CURDIR)/settings.h)\n\n") - - for i in image.getuserdefines(): - file.write("%s\n" %i) - file.write("\n") - - # print out all the object dependencies - file.write("\n# object dependencies (objectrules:)\n") - file.write("INIT-OBJECTS :=\n") - file.write("OBJECTS :=\n") - file.write("DRIVER :=\n") - file.write("\nSOURCES :=\n") - for irule, init in image.getinitobjectrules().items(): - i_name = init[0] - i_source = init[1] - file.write("INIT-OBJECTS += %s\n" % (i_name)) - file.write("SOURCES += %s\n" % (i_source)) - - for objrule, obj in image.getobjectrules().items(): - obj_name = obj[0] - obj_source = obj[1] - file.write("OBJECTS += %s\n" % (obj_name)) - file.write("SOURCES += %s\n" % (obj_source)) - - for srule, smm in image.getsmmobjectrules().items(): - s_name = smm[0] - s_source = smm[1] - file.write("SMM-OBJECTS += %s\n" % (s_name)) - file.write("SOURCES += %s\n" % (s_source)) - - - # for chip_target.c - file.write("OBJECTS += static.o\n") - file.write("SOURCES += static.c\n") - - for driverrule, driver in image.getdriverrules().items(): - obj_name = driver[0] - obj_source = driver[1] - file.write("DRIVER += %s\n" % (obj_name)) - file.write("SOURCES += %s\n" % (obj_source)) - - # Print out all ldscript.ld dependencies. - file.write("\n# ldscript.ld dependencies:\n") - file.write("LDSUBSCRIPTS-1 := \n" ) - for script in image.getldscripts(): - file.write("LDSUBSCRIPTS-1 += %s\n" % topify(script)) - - # Print out the dependencies for crt0_includes.h - file.write("\n# Dependencies for crt0_includes.h\n") - file.write("CRT0_INCLUDES:=\n") - for inc in image.getinitincludes(): - if (local_path.match(inc)): - file.write("CRT0_INCLUDES += %s\n" % inc) - else: - file.write("CRT0_INCLUDES += $(TOP)/src/%s\n" % inc) - - # Print out the user defines. - file.write("\n# userdefines:\n") - - # Print out the base rules. - # Need to have a rule that counts on 'all'. - file.write("\n# mainrulelist:") - - # Print out any user rules. - file.write("\n# From makerule or docipl commands:\n") - - file.write("\n# initobjectrules:\n") - for irule, init in image.getinitobjectrules().items(): - image.addmakeobject(file, init); - - file.write("\n# objectrules (don't duplicate initobjects):\n") - for objrule, obj in image.getobjectrules().items(): - - if (getdict(image.getinitobjectrules(), obj[3])): - debug.info(debug.object, "skipping %s" % (obj[3])) - else: - image.addmakeobject(file, obj); - - for driverrule, driver in image.getdriverrules().items(): - source = topify(driver[1]) - file.write("%s: %s\n" % (driver[0], source)) - file.write("\t$(CC) -c $(CFLAGS) -o $@ $<\n") - #file.write("%s\n" % objrule[2]) - - file.write("\n# smmobjectrules:\n") - for irule, smm in image.getsmmobjectrules().items(): - image.addmakeobject(file, smm); - - # special rule for chip_target.c - file.write("static.o: static.c\n") - file.write("\t$(CC) -c $(CFLAGS) -o $@ $<\n") - - # Print out the rules that will make cause the files - # generated by NLBConfig.py to be remade if any dependencies change. - - file.write("\n# Remember the automatically generated files\n") - file.write("GENERATED:=\n") - for genfile in ['Makefile', - 'nsuperio.c', - 'static.c', - 'corebootDoc.config' ]: - file.write("GENERATED += %s\n" % genfile) - file.write("GENERATED += %s\n" % image.getincludefilename()) - - keys = global_options_by_order - keys.sort() - file.write("\necho:\n") - for key in keys: - file.write("\t@echo %s='$(%s)'\n"% (key,key)) - - for i, m in image.getmakerules().items(): - file.write("%s: " %i) - for i in m.dependency: - file.write("%s " % i) - file.write("\n") - for i in m.actions: - file.write("\t%s\n" % i) - writemakefilefooter(file, makefilepath) - file.close() - -# -def writemakefile(path): - global rommapping - global bootblocksize - makefilepath = os.path.join(path, "Makefile") - print "Creating", makefilepath - file = safe_open(makefilepath, 'w+') - writemakefileheader(file, makefilepath) - - # Hack to get the necessary settings (CONFIG_COMPRESSED_PAYLOAD_LZMA): - file.write("include %s/Makefile.settings\n\n" % romimages.keys()[0]) - - # main rule - file.write("\nall: ") - for i in buildroms: - file.write(" %s" % i.name) - file.write("\n") - - # cbfstool rules - file.write("\ncbfstool:\n\tmkdir -p cbfs\n\t$(MAKE) -C $(TOP)/util/cbfstool obj=$(shell pwd)/cbfs\n") - file.write("\ncbfstool-clean:\n\t$(MAKE) -C $(TOP)/util/cbfstool obj=$(shell pwd)/cbfs clean\n\n") - - file.write("include Makefile.settings\n\n") - for i, o in romimages.items(): - file.write("%s/coreboot.rom:\n" % o.getname()) - file.write("\t$(MAKE) -C %s coreboot.rom\n" % o.getname()) - file.write("clean: ") - for i in romimages.keys(): - file.write(" %s-clean" % i) - file.write(" base-clean") - file.write("\n\n") - for i, o in romimages.items(): - file.write("%s-clean:\n" % o.getname()) - file.write("\t(cd %s; $(MAKE) clean)\n\n" % o.getname()) - file.write("base-clean:\n") - file.write("\trm -f romcc*\n\n") - - file.write("CBFS_COMPRESS_FLAG:=\n") - file.write("CBFS_STAGE_COMPRESS_FLAG:=\n") - file.write("ifeq \"$(CONFIG_COMPRESSED_PAYLOAD_LZMA)\" \"1\"\nCBFS_COMPRESS_FLAG:=l\nendif\n\n") - file.write("ifeq \"$(CONFIG_COMPRESS)\" \"1\"\nCBFS_STAGE_COMPRESS_FLAG:=l\nendif\n\n") - - for i in buildroms: - file.write("%s: cbfstool" %(i.name)) - for j in i.roms: - file.write(" %s/coreboot.rom " % j) - file.write("\n") - - romsize = getoption("CONFIG_ROM_SIZE", image) - - file.write("\n\trm -f %s\n" %(i.name)) - - # build the bootblock here. - file.write("\n\tcat") - for j in i.roms: - file.write(" %s/coreboot.rom " % j) - file.write("> %s.bootblock\n\n" %i.name) - file.write("\t./cbfs/cbfstool %s create %s %s.bootblock\n" - %(i.name, romsize, i.name)) - for j in pciroms: - file.write("\t./cbfs/cbfstool %s add %s pci%04x,%04x.rom optionrom\n" % (i.name, j.name, j.pci_vid, j.pci_did)) - for j in i.roms: - #failover is a hack that will go away soon. - if (j != "failover") and (rommapping[j] != "/dev/null"): - file.write("\t./cbfs/cbfstool %s add-payload %s %s/payload $(CBFS_COMPRESS_FLAG)\n" % (i.name, rommapping[j], j,)) - if (j != "failover"): - file.write("\t./cbfs/cbfstool %s add-stage %s/coreboot_ram %s/coreboot_ram $(CBFS_STAGE_COMPRESS_FLAG)\n" % (i.name, j, j,)) - file.write("\tif [ -f %s/coreboot_apc ]; then ./cbfs/cbfstool %s add-stage %s/coreboot_apc %s/coreboot_apc $(CBFS_COMPRESS_FLAG); fi\n" % (j, i.name, j, j,)) - file.write("\t./cbfs/cbfstool %s print\n" % i.name) - file.write("\n") - - file.write(".PHONY: all clean cbfstool") - for i in romimages.keys(): - file.write(" %s-clean" % i) - for i, o in romimages.items(): - file.write(" %s/coreboot.rom" % o.getname()) - file.write("\n\n") - - writemakefilefooter(file, makefilepath) - file.close() - -def writeinitincludes(image): - global include_pattern - filepath = os.path.join(image.gettargetdir(), image.getincludefilename()) - print "Creating", filepath - outfile = safe_open(filepath, 'w+') - if (image.newformat()): - infile = safe_open(image.getinitfile(), 'r') - - line = infile.readline() - while (line): - p = include_pattern.match(line) - if (p): - for i in image.getinitincludes(): - inc = image.getinitinclude(i) - if (inc.getstring() == p.group(1)): - outfile.write("#include \"%s\"\n" % inc.getpath()) - else: - outfile.write(line) - line = infile.readline() - - infile.close() - else: - for i in image.getinitincludes(): - outfile.write("#include <%s>\n" % i) - outfile.close() - -def writeldoptions(image): - """Write ldoptions file.""" - filename = os.path.join(image.gettargetdir(), "ldoptions") - print "Creating", filename - file = safe_open(filename, 'w+') - for o in global_exported_options: - if (hasvalue(o.name, image) and IsInt(getoption(o.name, image))): - file.write("%s = %s;\n" % (o.name, getformated(o.name, image))) - for o in image.exported_options: - if (not o in global_exported_options and hasvalue(o.name, image) and IsInt(getoption(o.name, image))): - file.write("%s = %s;\n" % (o.name, getformated(o.name, image))) - file.close() - -def dumptree(part, lvl): - debug.info(debug.dumptree, "DUMPTREE ME is") - part.dumpme(lvl) - # dump the siblings -- actually are there any? not sure - # siblings are: - debug.info(debug.dumptree, "DUMPTREE SIBLINGS are") - kid = part.next_sibling - while (kid): - kid.dumpme(lvl) - kid = kid.next_sibling - # dump the kids - debug.info(debug.dumptree, "DUMPTREE KIDS are") - #for kid in part.children: - if (part.children): - dumptree(part.children, lvl+1) - kid = part.next_sibling - while (kid): - if (kid.children): - dumptree(kid.children, lvl + 1) - kid = kid.next_sibling - debug.info(debug.dumptree, "DONE DUMPTREE") - -def writecode(image): - filename = os.path.join(img_dir, "static.c") - print "Creating", filename - file = safe_open(filename, 'w+') - file.write("#include <device/device.h>\n") - file.write("#include <device/pci.h>\n") - for path in image.getconfigincludes().values(): - file.write("#include \"%s\"\n" % path) - file.write("\n/* pass 0 */\n") - gencode(image.getroot(), file, 0) - file.write("\n/* pass 1 */\n") - gencode(image.getroot(), file, 1) - file.close() - -def gencode(part, file, pass_num): - debug.info(debug.gencode, "GENCODE ME is") - part.gencode(file, pass_num) - # dump the siblings -- actually are there any? not sure - debug.info(debug.gencode, "GENCODE SIBLINGS are") - kid = part.next_sibling - while (kid): - kid.gencode(file, pass_num) - kid = kid.next_sibling - # now dump the children - debug.info(debug.gencode, "GENCODE KIDS are") - if (part.children): - gencode(part.children, file, pass_num) - kid = part.next_sibling - while (kid): - if (kid.children): - gencode(kid.children, file, pass_num) - kid = kid.next_sibling - debug.info(debug.gencode, "DONE GENCODE") - -def writegraph(image): - filename = os.path.join(img_dir, "static.dot") - print "Creating", filename - file = safe_open(filename, 'w+') - file.write("digraph devicetree {\n") - file.write(" rankdir=LR\n") - genranks(image.getroot(), file, 0) - gennodes(image.getroot(), file) - gengraph(image.getroot(), file) - file.write("}\n") - file.close() - -def genranks(part, file, level): - #file.write(" # Level %d\n" % level ) - file.write(" { rank = same; \"dev_%s_%d\"" % (part.type_name,part.instance )) - sib = part.next_sibling - while (sib): - file.write("; \"dev_%s_%d\"" % (sib.type_name, sib.instance)) - sib = sib.next_sibling - file.write("}\n" ) - # now dump the children - if (part.children): - genranks(part.children, file, level + 1) - - kid = part.next_sibling - while (kid): - if (kid.children): - genranks(kid.children, file, level + 1) - kid = kid.next_sibling - - -def gennodes(part, file): - file.write(" dev_%s_%d[shape=record, label=\"%s\"];\n" % (part.type_name,part.instance,part.graph_name() )) - sib = part.next_sibling - while (sib): - file.write(" dev_%s_%d[shape=record, label=\"%s\"];\n" % (sib.type_name,sib.instance,sib.graph_name() )) - sib = sib.next_sibling - # now dump the children - if (part.children): - gennodes(part.children, file) - - kid = part.next_sibling - while (kid): - if (kid.children): - gennodes(kid.children, file) - kid = kid.next_sibling - - -def gengraph(part, file): - if (part.parent != part): - file.write(" dev_%s_%d -> dev_%s_%d;\n" % \ - (part.parent.type_name, part.parent.instance, \ - part.type_name, part.instance )) - sib = part.next_sibling - while (sib): - file.write(" dev_%s_%d -> dev_%s_%d;\n" % \ - (sib.parent.type_name, sib.parent.instance, \ - sib.type_name, sib.instance )) - sib = sib.next_sibling - - kid = part.next_sibling - while (kid): - if (kid.children): - gengraph(kid.children, file) - kid = kid.next_sibling - - if (part.children): - gengraph(part.children, file) - -def verifyparse(): - """Add any run-time checks to verify that parsing the configuration - was successful""" - - for image in romimages.values(): - print("Verifying ROMIMAGE %s" % image.name) - if (image.newformat() and image.getinitfile() == ''): - fatal("An init file must be specified") - for op in image.exported_options: - if (getoptionvalue(op.name, op, image) == 0 and getoptionvalue(op.name, op, 0) == 0): - warning("Exported option %s has no value (check Options.lb)" % op.name); - print("Verifing global options") - for op in global_exported_options: - if (getoptionvalue(op.name, op, 0) == 0): - notice("Exported option %s has no value (check Options.lb)" % op.name); - -#============================================================================= -# MAIN PROGRAM -#============================================================================= -if __name__=='__main__': - from sys import argv - if (len(argv) < 3): - fatal("Args: <file> <path to coreboot>") - - top_config_file = os.path.abspath(sys.argv[1]) - - treetop = os.path.abspath(sys.argv[2]) - - # Now read in the customizing script... - loc.push(argv[1]) - fp = safe_open(argv[1], 'r') - if (not parse('board', fp.read())): - fatal("Could not parse file") - loc.pop() - - verifyparse() - - # no longer need to check if an options has been used - alloptions = 1 - - for image_name, image in romimages.items(): - if (debug.level(debug.dumptree)): - debug.info(debug.dumptree, "DEVICE TREE:") - dumptree(image.getroot(), 0) - - img_dir = image.gettargetdir() - if not os.path.isdir(img_dir): - print "Creating directory %s" % img_dir - os.makedirs(img_dir) - - for objrule, obj in image.getobjectrules().items(): - sub_dir = img_dir + '/' + os.path.dirname(obj[0]) - if not os.path.isdir(sub_dir): - print "Creating sub directory %s" % sub_dir - os.makedirs(sub_dir) - - for driverrule, driver in image.getdriverrules().items(): - sub_dir = img_dir + '/' + os.path.dirname(driver[0]) - if not os.path.isdir(sub_dir): - print "Creating sub directory %s" % sub_dir - os.makedirs(sub_dir) - - for srule, smm in image.getsmmobjectrules().items(): - sub_dir = img_dir + '/' + os.path.dirname(smm[0]) - if not os.path.isdir(sub_dir): - print "Creating sub directory %s" % sub_dir - os.makedirs(sub_dir) - - for irule, init in image.getinitobjectrules().items(): - sub_dir = img_dir + '/' + os.path.dirname(init[0]) - if not os.path.isdir(sub_dir): - print "Creating sub directory %s" % sub_dir - os.makedirs(sub_dir) - - if (debug.level(debug.dump)): - for i in image.getinitincludes(): - debug.info(debug.dump, "crt0include file %s" % i) - for i in image.getdriverrules().keys(): - debug.info(debug.dump, "driver file %s" % i) - for i in image.getldscripts(): - debug.info(debug.dump, "ldscript file %s" % i) - for i, m in image.getmakerules().items(): - debug.info(debug.dump, " makerule %s dep %s act %s" % (i, m.dependency, m.actions)) - - writecode(image) - writeimagesettings(image) - writeinitincludes(image) - writeimagemakefile(image) - writeldoptions(image) - writegraph(image) - - writemakefilesettings(target_dir) - writemakefile(target_dir) - - sys.exit(0) diff --git a/util/newconfig/parsedesc.g b/util/newconfig/parsedesc.g deleted file mode 100644 index 7113c6d6f3..0000000000 --- a/util/newconfig/parsedesc.g +++ /dev/null @@ -1,196 +0,0 @@ -###################################################################### -# The remainder of this file is from parsedesc.{g,py} - -def append(lst, x): - "Imperative append" - lst.append(x) - return lst - -def add_inline_token(tokens, str): - tokens.insert( 0, (str, eval(str, {}, {})) ) - return Terminal(str) - -def cleanup_choice(lst): - if len(lst) == 0: return Sequence([]) - if len(lst) == 1: return lst[0] - return apply(Choice, tuple(lst)) - -def cleanup_sequence(lst): - if len(lst) == 1: return lst[0] - return apply(Sequence, tuple(lst)) - -def cleanup_rep(node, rep): - if rep == 'star': return Star(node) - elif rep == 'plus': return Plus(node) - else: return node - -def resolve_name(tokens, id, args): - if id in map(lambda x: x[0], tokens): - # It's a token - if args: - print 'Warning: ignoring parameters on TOKEN %s<<%s>>' % (id, args) - return Terminal(id) - else: - # It's a name, so assume it's a nonterminal - return NonTerminal(id, args) - -%% -parser ParserDescription: - option: "context-insensitive-scanner" - - ignore: "[ \t\r\n]+" - ignore: "#.*?\r?\n" - token END: "$" - token ATTR: "<<.+?>>" - token STMT: "{{.+?}}" - token ID: '[a-zA-Z_][a-zA-Z_0-9]*' - token STR: '[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"' - token LP: '\\(' - token RP: '\\)' - token LB: '\\[' - token RB: '\\]' - token OR: '[|]' - token STAR: '[*]' - token PLUS: '[+]' - token QUEST: '[?]' - token COLON: ':' - - rule Parser: "parser" ID ":" - Options - Tokens - Rules<<Tokens>> - END - {{ return Generator(ID,Options,Tokens,Rules) }} - - rule Options: {{ opt = {} }} - ( "option" ":" Str {{ opt[Str] = 1 }} )* - {{ return opt }} - - rule Tokens: {{ tok = [] }} - ( - "token" ID ":" Str {{ tok.append( (ID,Str) ) }} - | "ignore" ":" Str {{ tok.append( ('#ignore',Str) ) }} - )* - {{ return tok }} - - rule Rules<<tokens>>: - {{ rul = [] }} - ( - "rule" ID OptParam ":" ClauseA<<tokens>> - {{ rul.append( (ID,OptParam,ClauseA) ) }} - )* - {{ return rul }} - - rule ClauseA<<tokens>>: - ClauseB<<tokens>> - {{ v = [ClauseB] }} - ( OR ClauseB<<tokens>> {{ v.append(ClauseB) }} )* - {{ return cleanup_choice(v) }} - - rule ClauseB<<tokens>>: - {{ v = [] }} - ( ClauseC<<tokens>> {{ v.append(ClauseC) }} )* - {{ return cleanup_sequence(v) }} - - rule ClauseC<<tokens>>: - ClauseD<<tokens>> - ( PLUS {{ return Plus(ClauseD) }} - | STAR {{ return Star(ClauseD) }} - | {{ return ClauseD }} ) - - rule ClauseD<<tokens>>: - STR {{ t = (STR, eval(STR,{},{})) }} - {{ if t not in tokens: tokens.insert( 0, t ) }} - {{ return Terminal(STR) }} - | ID OptParam {{ return resolve_name(tokens, ID, OptParam) }} - | LP ClauseA<<tokens>> RP {{ return ClauseA }} - | LB ClauseA<<tokens>> RB {{ return Option(ClauseA) }} - | STMT {{ return Eval(STMT[2:-2]) }} - - rule OptParam: [ ATTR {{ return ATTR[2:-2] }} ] {{ return '' }} - rule Str: STR {{ return eval(STR,{},{}) }} -%% - -# This replaces the default main routine - -yapps_options = [ - ('context-insensitive-scanner', 'context-insensitive-scanner', - 'Scan all tokens (see docs)') - ] - -def generate(inputfilename, outputfilename='', dump=0, **flags): - """Generate a grammar, given an input filename (X.g) - and an output filename (defaulting to X.py).""" - - if not outputfilename: - if inputfilename[-2:]=='.g': outputfilename = inputfilename[:-2]+'.py' - else: raise "Invalid Filename", outputfilename - - print 'Input Grammar:', inputfilename - print 'Output File:', outputfilename - - DIVIDER = '\n%%\n' # This pattern separates the pre/post parsers - preparser, postparser = None, None # Code before and after the parser desc - - # Read the entire file - s = open(inputfilename,'r').read() - - # See if there's a separation between the pre-parser and parser - f = find(s, DIVIDER) - if f >= 0: preparser, s = s[:f]+'\n\n', s[f+len(DIVIDER):] - - # See if there's a separation between the parser and post-parser - f = find(s, DIVIDER) - if f >= 0: s, postparser = s[:f], '\n\n'+s[f+len(DIVIDER):] - - # Create the parser and scanner - p = ParserDescription(ParserDescriptionScanner(s)) - if not p: return - - # Now parse the file - t = wrap_error_reporter(p, 'Parser') - if not t: return # Error - if preparser is not None: t.preparser = preparser - if postparser is not None: t.postparser = postparser - - # Check the options - for f in t.options.keys(): - for opt,_,_ in yapps_options: - if f == opt: break - else: - print 'Warning: unrecognized option', f - # Add command line options to the set - for f in flags.keys(): t.options[f] = flags[f] - - # Generate the output - if dump: - t.dump_information() - else: - t.output = open(outputfilename, 'w') - t.generate_output() - -if __name__=='__main__': - import sys, getopt - optlist, args = getopt.getopt(sys.argv[1:], 'f:', ['dump']) - if not args or len(args) > 2: - print 'Usage:' - print ' python', sys.argv[0], '[flags] input.g [output.py]' - print 'Flags:' - print (' --dump' + ' '*40)[:35] + 'Dump out grammar information' - for flag, _, doc in yapps_options: - print (' -f' + flag + ' '*40)[:35] + doc - else: - # Read in the options and create a list of flags - flags = {} - for opt in optlist: - for flag, name, _ in yapps_options: - if opt == ('-f', flag): - flags[name] = 1 - break - else: - if opt == ('--dump', ''): - flags['dump'] = 1 - else: - print 'Warning - unrecognized option: ', opt[0], opt[1] - - apply(generate, tuple(args), flags) diff --git a/util/newconfig/test.config b/util/newconfig/test.config deleted file mode 100644 index c492f200fa..0000000000 --- a/util/newconfig/test.config +++ /dev/null @@ -1,6 +0,0 @@ -target x -mainboard amd/solo -# option X=1 -# makerule x y "z" - payload /dev/null -end diff --git a/util/newconfig/yapps2.py b/util/newconfig/yapps2.py deleted file mode 100644 index 71bfa05ca6..0000000000 --- a/util/newconfig/yapps2.py +++ /dev/null @@ -1,779 +0,0 @@ -# Yapps 2.0 - yet another python parser system -# Amit J Patel, January 1999 -# See http://theory.stanford.edu/~amitp/Yapps/ for documentation and updates - -# v2.0.1 changes (October 2001): -# * The exceptions inherit the standard Exception class (thanks Rich Salz) -# * The scanner can use either a different set of regular expressions -# per instance, or allows the subclass to define class fields with -# the patterns. This improves performance when many Scanner objects -# are being created, because the regular expressions don't have to -# be recompiled each time. (thanks Amaury Forgeot d'Arc) -# v2.0.2 changes (April 2002) -# * Fixed a bug in generating the 'else' clause when the comment was too -# long. v2.0.1 was missing a newline. (thanks Steven Engelhardt) -# v2.0.3 changes (August 2002) -# * Fixed a bug with inline tokens using the r"" syntax. - -from string import * -from yappsrt import * -import re - -INDENT = " "*4 - -class Generator: - def __init__(self, name, options, tokens, rules): - self.change_count = 0 - self.name = name - self.options = options - self.preparser = '' - self.postparser = None - - self.tokens = {} # Map from tokens to regexps - self.ignore = [] # List of token names to ignore in parsing - self.terminals = [] # List of token names (to maintain ordering) - for n,t in tokens: - if n == '#ignore': - n = t - self.ignore.append(n) - if n in self.tokens.keys() and self.tokens[n] != t: - print 'Warning: token', n, 'multiply defined.' - self.tokens[n] = t - self.terminals.append(n) - - self.rules = {} # Map from rule names to parser nodes - self.params = {} # Map from rule names to parameters - self.goals = [] # List of rule names (to maintain ordering) - for n,p,r in rules: - self.params[n] = p - self.rules[n] = r - self.goals.append(n) - - import sys - self.output = sys.stdout - - def __getitem__(self, name): - # Get options - return self.options.get(name, 0) - - def non_ignored_tokens(self): - return filter(lambda x, i=self.ignore: x not in i, self.terminals) - - def changed(self): - self.change_count = 1+self.change_count - - def subset(self, a, b): - "See if all elements of a are inside b" - for x in a: - if x not in b: return 0 - return 1 - - def equal_set(self, a, b): - "See if a and b have the same elements" - if len(a) != len(b): return 0 - if a == b: return 1 - return self.subset(a, b) and self.subset(b, a) - - def add_to(self, parent, additions): - "Modify parent to include all elements in additions" - for x in additions: - if x not in parent: - parent.append(x) - self.changed() - - def equate(self, a, b): - self.add_to(a, b) - self.add_to(b, a) - - def write(self, *args): - for a in args: - self.output.write(a) - - def in_test(self, x, full, b): - if not b: return '0' - if len(b)==1: return '%s == %s' % (x, `b[0]`) - if full and len(b) > len(full)/2: - # Reverse the sense of the test. - not_b = filter(lambda x, b=b: x not in b, full) - return self.not_in_test(x, full, not_b) - return '%s in %s' % (x, `b`) - - def not_in_test(self, x, full, b): - if not b: return '1' - if len(b)==1: return '%s != %s' % (x, `b[0]`) - return '%s not in %s' % (x, `b`) - - def peek_call(self, a): - a_set = (`a`[1:-1]) - if self.equal_set(a, self.non_ignored_tokens()): a_set = '' - if self['context-insensitive-scanner']: a_set = '' - return 'self._peek(%s)' % a_set - - def peek_test(self, a, b): - if self.subset(a, b): return '1' - if self['context-insensitive-scanner']: a = self.non_ignored_tokens() - return self.in_test(self.peek_call(a), a, b) - - def not_peek_test(self, a, b): - if self.subset(a, b): return '0' - return self.not_in_test(self.peek_call(a), a, b) - - def calculate(self): - while 1: - for r in self.goals: - self.rules[r].setup(self, r) - if self.change_count == 0: break - self.change_count = 0 - - while 1: - for r in self.goals: - self.rules[r].update(self) - if self.change_count == 0: break - self.change_count = 0 - - def dump_information(self): - self.calculate() - for r in self.goals: - print ' _____' + '_'*len(r) - print ('___/Rule '+r+'\\' + '_'*80)[:79] - queue = [self.rules[r]] - while queue: - top = queue[0] - del queue[0] - - print `top` - top.first.sort() - top.follow.sort() - eps = [] - if top.accepts_epsilon: eps = ['(null)'] - print ' FIRST:', join(top.first+eps, ', ') - print ' FOLLOW:', join(top.follow, ', ') - for x in top.get_children(): queue.append(x) - - def generate_output(self): - self.calculate() - self.write(self.preparser) - self.write("from string import *\n") - self.write("import re\n") - self.write("from yappsrt import *\n") - self.write("\n") - self.write("class ", self.name, "Scanner(Scanner):\n") - self.write(" patterns = [\n") - for p in self.terminals: - self.write(" (%s, re.compile(%s)),\n" % ( - `p`, `self.tokens[p]`)) - self.write(" ]\n") - self.write(" def __init__(self, str):\n") - self.write(" Scanner.__init__(self,None,%s,str)\n" % - `self.ignore`) - self.write("\n") - - self.write("class ", self.name, "(Parser):\n") - for r in self.goals: - self.write(INDENT, "def ", r, "(self") - if self.params[r]: self.write(", ", self.params[r]) - self.write("):\n") - self.rules[r].output(self, INDENT+INDENT) - self.write("\n") - - self.write("\n") - self.write("def parse(rule, text):\n") - self.write(" P = ", self.name, "(", self.name, "Scanner(text))\n") - self.write(" return wrap_error_reporter(P, rule)\n") - self.write("\n") - if self.postparser is not None: - self.write(self.postparser) - else: - self.write("if __name__=='__main__':\n") - self.write(INDENT, "from sys import argv, stdin\n") - self.write(INDENT, "if len(argv) >= 2:\n") - self.write(INDENT*2, "if len(argv) >= 3:\n") - self.write(INDENT*3, "f = open(argv[2],'r')\n") - self.write(INDENT*2, "else:\n") - self.write(INDENT*3, "f = stdin\n") - self.write(INDENT*2, "print parse(argv[1], f.read())\n") - self.write(INDENT, "else: print 'Args: <rule> [<filename>]'\n") - -###################################################################### -class Node: - def __init__(self): - self.first = [] - self.follow = [] - self.accepts_epsilon = 0 - self.rule = '?' - - def setup(self, gen, rule): - # Setup will change accepts_epsilon, - # sometimes from 0 to 1 but never 1 to 0. - # It will take a finite number of steps to set things up - self.rule = rule - - def used(self, vars): - "Return two lists: one of vars used, and the other of vars assigned" - return vars, [] - - def get_children(self): - "Return a list of sub-nodes" - return [] - - def __repr__(self): - return str(self) - - def update(self, gen): - if self.accepts_epsilon: - gen.add_to(self.first, self.follow) - - def output(self, gen, indent): - "Write out code to _gen_ with _indent_:string indentation" - gen.write(indent, "assert 0 # Invalid parser node\n") - -class Terminal(Node): - def __init__(self, token): - Node.__init__(self) - self.token = token - self.accepts_epsilon = 0 - - def __str__(self): - return self.token - - def update(self, gen): - Node.update(self, gen) - if self.first != [self.token]: - self.first = [self.token] - gen.changed() - - def output(self, gen, indent): - gen.write(indent) - if re.match('[a-zA-Z_]+$', self.token): - gen.write(self.token, " = ") - gen.write("self._scan(%s)\n" % `self.token`) - -class Eval(Node): - def __init__(self, expr): - Node.__init__(self) - self.expr = expr - - def setup(self, gen, rule): - Node.setup(self, gen, rule) - if not self.accepts_epsilon: - self.accepts_epsilon = 1 - gen.changed() - - def __str__(self): - return '{{ %s }}' % strip(self.expr) - - def output(self, gen, indent): - gen.write(indent, strip(self.expr), '\n') - -class NonTerminal(Node): - def __init__(self, name, args): - Node.__init__(self) - self.name = name - self.args = args - - def setup(self, gen, rule): - Node.setup(self, gen, rule) - try: - self.target = gen.rules[self.name] - if self.accepts_epsilon != self.target.accepts_epsilon: - self.accepts_epsilon = self.target.accepts_epsilon - gen.changed() - except KeyError: # Oops, it's nonexistent - print 'Error: no rule <%s>' % self.name - self.target = self - - def __str__(self): - return '<%s>' % self.name - - def update(self, gen): - Node.update(self, gen) - gen.equate(self.first, self.target.first) - gen.equate(self.follow, self.target.follow) - - def output(self, gen, indent): - gen.write(indent) - gen.write(self.name, " = ") - gen.write("self.", self.name, "(", self.args, ")\n") - -class Sequence(Node): - def __init__(self, *children): - Node.__init__(self) - self.children = children - - def setup(self, gen, rule): - Node.setup(self, gen, rule) - for c in self.children: c.setup(gen, rule) - - if not self.accepts_epsilon: - # If it's not already accepting epsilon, it might now do so. - for c in self.children: - # any non-epsilon means all is non-epsilon - if not c.accepts_epsilon: break - else: - self.accepts_epsilon = 1 - gen.changed() - - def get_children(self): - return self.children - - def __str__(self): - return '( %s )' % join(map(lambda x: str(x), self.children)) - - def update(self, gen): - Node.update(self, gen) - for g in self.children: - g.update(gen) - - empty = 1 - for g_i in range(len(self.children)): - g = self.children[g_i] - - if empty: gen.add_to(self.first, g.first) - if not g.accepts_epsilon: empty = 0 - - if g_i == len(self.children)-1: - next = self.follow - else: - next = self.children[1+g_i].first - gen.add_to(g.follow, next) - - if self.children: - gen.add_to(self.follow, self.children[-1].follow) - - def output(self, gen, indent): - if self.children: - for c in self.children: - c.output(gen, indent) - else: - # Placeholder for empty sequences, just in case - gen.write(indent, 'pass\n') - -class Choice(Node): - def __init__(self, *children): - Node.__init__(self) - self.children = children - - def setup(self, gen, rule): - Node.setup(self, gen, rule) - for c in self.children: c.setup(gen, rule) - - if not self.accepts_epsilon: - for c in self.children: - if c.accepts_epsilon: - self.accepts_epsilon = 1 - gen.changed() - - def get_children(self): - return self.children - - def __str__(self): - return '( %s )' % join(map(lambda x: str(x), self.children), ' | ') - - def update(self, gen): - Node.update(self, gen) - for g in self.children: - g.update(gen) - - for g in self.children: - gen.add_to(self.first, g.first) - gen.add_to(self.follow, g.follow) - for g in self.children: - gen.add_to(g.follow, self.follow) - if self.accepts_epsilon: - gen.add_to(self.first, self.follow) - - def output(self, gen, indent): - test = "if" - gen.write(indent, "_token_ = ", gen.peek_call(self.first), "\n") - tokens_seen = [] - tokens_unseen = self.first[:] - if gen['context-insensitive-scanner']: - # Context insensitive scanners can return ANY token, - # not only the ones in first. - tokens_unseen = gen.non_ignored_tokens() - for c in self.children: - testset = c.first[:] - removed = [] - for x in testset: - if x in tokens_seen: - testset.remove(x) - removed.append(x) - if x in tokens_unseen: tokens_unseen.remove(x) - tokens_seen = tokens_seen + testset - if removed: - if not testset: - print 'Error in rule', self.rule+':', c, 'never matches.' - else: - print 'Warning:', self - print ' * These tokens are being ignored:', join(removed, ', ') - print ' due to previous choices using them.' - - if testset: - if not tokens_unseen: # context sensitive scanners only! - if test=='if': - # if it's the first AND last test, then - # we can simply put the code without an if/else - c.output(gen, indent) - else: - gen.write(indent, "else: ") - t = gen.in_test('', [], testset) - if len(t) < 70-len(indent): - gen.write("#", t) - gen.write("\n") - c.output(gen, indent+INDENT) - else: - gen.write(indent, test, " ", - gen.in_test('_token_', tokens_unseen, testset), - ":\n") - c.output(gen, indent+INDENT) - test = "elif" - - if gen['context-insensitive-scanner'] and tokens_unseen: - gen.write(indent, "else:\n") - gen.write(indent, INDENT, "raise SyntaxError(self._pos, ") - gen.write("'Could not match ", self.rule, "')\n") - -class Wrapper(Node): - def __init__(self, child): - Node.__init__(self) - self.child = child - - def setup(self, gen, rule): - Node.setup(self, gen, rule) - self.child.setup(gen, rule) - - def get_children(self): - return [self.child] - - def update(self, gen): - Node.update(self, gen) - self.child.update(gen) - gen.add_to(self.first, self.child.first) - gen.equate(self.follow, self.child.follow) - -class Option(Wrapper): - def setup(self, gen, rule): - Wrapper.setup(self, gen, rule) - if not self.accepts_epsilon: - self.accepts_epsilon = 1 - gen.changed() - - def __str__(self): - return '[ %s ]' % str(self.child) - - def output(self, gen, indent): - if self.child.accepts_epsilon: - print 'Warning in rule', self.rule+': contents may be empty.' - gen.write(indent, "if %s:\n" % - gen.peek_test(self.first, self.child.first)) - self.child.output(gen, indent+INDENT) - -class Plus(Wrapper): - def setup(self, gen, rule): - Wrapper.setup(self, gen, rule) - if self.accepts_epsilon != self.child.accepts_epsilon: - self.accepts_epsilon = self.child.accepts_epsilon - gen.changed() - - def __str__(self): - return '%s+' % str(self.child) - - def update(self, gen): - Wrapper.update(self, gen) - gen.add_to(self.follow, self.first) - - def output(self, gen, indent): - if self.child.accepts_epsilon: - print 'Warning in rule', self.rule+':' - print ' * The repeated pattern could be empty. The resulting' - print ' parser may not work properly.' - gen.write(indent, "while 1:\n") - self.child.output(gen, indent+INDENT) - union = self.first[:] - gen.add_to(union, self.follow) - gen.write(indent+INDENT, "if %s: break\n" % - gen.not_peek_test(union, self.child.first)) - -class Star(Plus): - def setup(self, gen, rule): - Wrapper.setup(self, gen, rule) - if not self.accepts_epsilon: - self.accepts_epsilon = 1 - gen.changed() - - def __str__(self): - return '%s*' % str(self.child) - - def output(self, gen, indent): - if self.child.accepts_epsilon: - print 'Warning in rule', self.rule+':' - print ' * The repeated pattern could be empty. The resulting' - print ' parser probably will not work properly.' - gen.write(indent, "while %s:\n" % - gen.peek_test(self.follow, self.child.first)) - self.child.output(gen, indent+INDENT) - -###################################################################### -# The remainder of this file is from parsedesc.{g,py} - -def append(lst, x): - "Imperative append" - lst.append(x) - return lst - -def add_inline_token(tokens, str): - tokens.insert( 0, (str, eval(str, {}, {})) ) - return Terminal(str) - -def cleanup_choice(lst): - if len(lst) == 0: return Sequence([]) - if len(lst) == 1: return lst[0] - return apply(Choice, tuple(lst)) - -def cleanup_sequence(lst): - if len(lst) == 1: return lst[0] - return apply(Sequence, tuple(lst)) - -def cleanup_rep(node, rep): - if rep == 'star': return Star(node) - elif rep == 'plus': return Plus(node) - else: return node - -def resolve_name(tokens, id, args): - if id in map(lambda x: x[0], tokens): - # It's a token - if args: - print 'Warning: ignoring parameters on TOKEN %s<<%s>>' % (id, args) - return Terminal(id) - else: - # It's a name, so assume it's a nonterminal - return NonTerminal(id, args) - - -from string import * -import re -from yappsrt import * - -class ParserDescriptionScanner(Scanner): - def __init__(self, str): - Scanner.__init__(self,[ - ('"rule"', 'rule'), - ('"ignore"', 'ignore'), - ('"token"', 'token'), - ('"option"', 'option'), - ('":"', ':'), - ('"parser"', 'parser'), - ('[ \011\015\012]+', '[ \011\015\012]+'), - ('#.*?\015?\012', '#.*?\015?\012'), - ('END', '$'), - ('ATTR', '<<.+?>>'), - ('STMT', '{{.+?}}'), - ('ID', '[a-zA-Z_][a-zA-Z_0-9]*'), - ('STR', '[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"'), - ('LP', '\\('), - ('RP', '\\)'), - ('LB', '\\['), - ('RB', '\\]'), - ('OR', '[|]'), - ('STAR', '[*]'), - ('PLUS', '[+]'), - ], ['[ \011\015\012]+', '#.*?\015?\012'], str) - -class ParserDescription(Parser): - def Parser(self): - self._scan('"parser"') - ID = self._scan('ID') - self._scan('":"') - Options = self.Options() - Tokens = self.Tokens() - Rules = self.Rules(Tokens) - END = self._scan('END') - return Generator(ID,Options,Tokens,Rules) - - def Options(self): - opt = {} - while self._peek('"option"', '"token"', '"ignore"', 'END', '"rule"') == '"option"': - self._scan('"option"') - self._scan('":"') - Str = self.Str() - opt[Str] = 1 - return opt - - def Tokens(self): - tok = [] - while self._peek('"token"', '"ignore"', 'END', '"rule"') in ['"token"', '"ignore"']: - _token_ = self._peek('"token"', '"ignore"') - if _token_ == '"token"': - self._scan('"token"') - ID = self._scan('ID') - self._scan('":"') - Str = self.Str() - tok.append( (ID,Str) ) - else: # == '"ignore"' - self._scan('"ignore"') - self._scan('":"') - Str = self.Str() - tok.append( ('#ignore',Str) ) - return tok - - def Rules(self, tokens): - rul = [] - while self._peek('"rule"', 'END') == '"rule"': - self._scan('"rule"') - ID = self._scan('ID') - OptParam = self.OptParam() - self._scan('":"') - ClauseA = self.ClauseA(tokens) - rul.append( (ID,OptParam,ClauseA) ) - return rul - - def ClauseA(self, tokens): - ClauseB = self.ClauseB(tokens) - v = [ClauseB] - while self._peek('OR', 'RP', 'RB', '"rule"', 'END') == 'OR': - OR = self._scan('OR') - ClauseB = self.ClauseB(tokens) - v.append(ClauseB) - return cleanup_choice(v) - - def ClauseB(self, tokens): - v = [] - while self._peek('STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'END') in ['STR', 'ID', 'LP', 'LB', 'STMT']: - ClauseC = self.ClauseC(tokens) - v.append(ClauseC) - return cleanup_sequence(v) - - def ClauseC(self, tokens): - ClauseD = self.ClauseD(tokens) - _token_ = self._peek('PLUS', 'STAR', 'STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'END') - if _token_ == 'PLUS': - PLUS = self._scan('PLUS') - return Plus(ClauseD) - elif _token_ == 'STAR': - STAR = self._scan('STAR') - return Star(ClauseD) - else: - return ClauseD - - def ClauseD(self, tokens): - _token_ = self._peek('STR', 'ID', 'LP', 'LB', 'STMT') - if _token_ == 'STR': - STR = self._scan('STR') - t = (STR, eval(STR,{},{})) - if t not in tokens: tokens.insert( 0, t ) - return Terminal(STR) - elif _token_ == 'ID': - ID = self._scan('ID') - OptParam = self.OptParam() - return resolve_name(tokens, ID, OptParam) - elif _token_ == 'LP': - LP = self._scan('LP') - ClauseA = self.ClauseA(tokens) - RP = self._scan('RP') - return ClauseA - elif _token_ == 'LB': - LB = self._scan('LB') - ClauseA = self.ClauseA(tokens) - RB = self._scan('RB') - return Option(ClauseA) - else: # == 'STMT' - STMT = self._scan('STMT') - return Eval(STMT[2:-2]) - - def OptParam(self): - if self._peek('ATTR', '":"', 'PLUS', 'STAR', 'STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'END') == 'ATTR': - ATTR = self._scan('ATTR') - return ATTR[2:-2] - return '' - - def Str(self): - STR = self._scan('STR') - return eval(STR,{},{}) - - - - - -# This replaces the default main routine - -yapps_options = [ - ('context-insensitive-scanner', 'context-insensitive-scanner', - 'Scan all tokens (see docs)') - ] - -def generate(inputfilename, outputfilename='', dump=0, **flags): - """Generate a grammar, given an input filename (X.g) - and an output filename (defaulting to X.py).""" - - if not outputfilename: - if inputfilename[-2:]=='.g': outputfilename = inputfilename[:-2]+'.py' - else: raise "Invalid Filename", outputfilename - - print 'Input Grammar:', inputfilename - print 'Output File:', outputfilename - - DIVIDER = '\n%%\n' # This pattern separates the pre/post parsers - preparser, postparser = None, None # Code before and after the parser desc - - # Read the entire file - s = open(inputfilename,'r').read() - - # See if there's a separation between the pre-parser and parser - f = find(s, DIVIDER) - if f >= 0: preparser, s = s[:f]+'\n\n', s[f+len(DIVIDER):] - - # See if there's a separation between the parser and post-parser - f = find(s, DIVIDER) - if f >= 0: s, postparser = s[:f], '\n\n'+s[f+len(DIVIDER):] - - # Create the parser and scanner - p = ParserDescription(ParserDescriptionScanner(s)) - if not p: return - - # Now parse the file - t = wrap_error_reporter(p, 'Parser') - if not t: return # Error - if preparser is not None: t.preparser = preparser - if postparser is not None: t.postparser = postparser - - # Check the options - for f in t.options.keys(): - for opt,_,_ in yapps_options: - if f == opt: break - else: - print 'Warning: unrecognized option', f - # Add command line options to the set - for f in flags.keys(): t.options[f] = flags[f] - - # Generate the output - if dump: - t.dump_information() - else: - t.output = open(outputfilename, 'w') - t.generate_output() - -if __name__=='__main__': - import sys, getopt - optlist, args = getopt.getopt(sys.argv[1:], 'f:', ['dump']) - if not args or len(args) > 2: - print 'Usage:' - print ' python', sys.argv[0], '[flags] input.g [output.py]' - print 'Flags:' - print (' --dump' + ' '*40)[:35] + 'Dump out grammar information' - for flag, _, doc in yapps_options: - print (' -f' + flag + ' '*40)[:35] + doc - else: - # Read in the options and create a list of flags - flags = {} - for opt in optlist: - for flag, name, _ in yapps_options: - if opt == ('-f', flag): - flags[name] = 1 - break - else: - if opt == ('--dump', ''): - flags['dump'] = 1 - else: - print 'Warning - unrecognized option: ', opt[0], opt[1] - - apply(generate, tuple(args), flags) diff --git a/util/newconfig/yapps2.tex b/util/newconfig/yapps2.tex deleted file mode 100644 index 9d2bddf19c..0000000000 --- a/util/newconfig/yapps2.tex +++ /dev/null @@ -1,1225 +0,0 @@ -\documentclass[10pt]{article} -\usepackage{palatino} -\usepackage{html} -\usepackage{color} - -\setlength{\headsep}{0in} -\setlength{\headheight}{0in} -\setlength{\textheight}{8.5in} -\setlength{\textwidth}{5.9in} -\setlength{\oddsidemargin}{0.25in} - -\definecolor{darkblue}{rgb}{0,0,0.6} -\definecolor{darkerblue}{rgb}{0,0,0.3} - -%% \newcommand{\mysection}[1]{\section{\textcolor{darkblue}{#1}}} -%% \newcommand{\mysubsection}[1]{\subsection{\textcolor{darkerblue}{#1}}} -\newcommand{\mysection}[1]{\section{#1}} -\newcommand{\mysubsection}[1]{\subsection{#1}} - -\bodytext{bgcolor=white text=black link=#004080 vlink=#006020} - -\newcommand{\first}{\textsc{first}} -\newcommand{\follow}{\textsc{follow}} - -\begin{document} - -\begin{center} -\hfill \begin{tabular}{c} -{\Large The \emph{Yapps} Parser Generator System}\\ -\verb|http://theory.stanford.edu/~amitp/Yapps/|\\ - Version 2\\ -\\ -Amit J. Patel\\ -\htmladdnormallink{http://www-cs-students.stanford.edu/~amitp/} -{http://www-cs-students.stanford.edu/~amitp/} - -\end{tabular} \hfill \rule{0in}{0in} -\end{center} - -\mysection{Introduction} - -\emph{Yapps} (\underline{Y}et \underline{A}nother \underline{P}ython -\underline{P}arser \underline{S}ystem) is an easy to use parser -generator that is written in Python and generates Python code. There -are several parser generator systems already available for Python, -including \texttt{PyLR, kjParsing, PyBison,} and \texttt{mcf.pars,} -but I had different goals for my parser. Yapps is simple, is easy to -use, and produces human-readable parsers. It is not the fastest or -most powerful parser. Yapps is designed to be used when regular -expressions are not enough and other parser systems are too much: -situations where you may write your own recursive descent parser. - -Some unusual features of Yapps that may be of interest are: - -\begin{enumerate} - - \item Yapps produces recursive descent parsers that are readable by - humans, as opposed to table-driven parsers that are difficult to - read. A Yapps parser for a simple calculator looks similar to the - one that Mark Lutz wrote by hand for \emph{Programming Python.} - - \item Yapps also allows for rules that accept parameters and pass - arguments to be used while parsing subexpressions. Grammars that - allow for arguments to be passed to subrules and for values to be - passed back are often called \emph{attribute grammars.} In many - cases parameterized rules can be used to perform actions at ``parse - time'' that are usually delayed until later. For example, - information about variable declarations can be passed into the - rules that parse a procedure body, so that undefined variables can - be detected at parse time. The types of defined variables can be - used in parsing as well---for example, if the type of {\tt X} is - known, we can determine whether {\tt X(1)} is an array reference or - a function call. - - \item Yapps grammars are fairly easy to write, although there are - some inconveniences having to do with ELL(1) parsing that have to be - worked around. For example, rules have to be left factored and - rules may not be left recursive. However, neither limitation seems - to be a problem in practice. - - Yapps grammars look similar to the notation used in the Python - reference manual, with operators like \verb:*:, \verb:+:, \verb:|:, - \verb:[]:, and \verb:(): for patterns, names ({\tt tim}) for rules, - regular expressions (\verb:"[a-z]+":) for tokens, and \verb:#: for - comments. - - \item The Yapps parser generator is written as a single Python module - with no C extensions. Yapps produces parsers that are written - entirely in Python, and require only the Yapps run-time module (5k) - for support. - - \item Yapps's scanner is context-sensitive, picking tokens based on - the types of the tokens accepted by the parser. This can be - helpful when implementing certain kinds of parsers, such as for a - preprocessor. - -\end{enumerate} - -There are several disadvantages of using Yapps over another parser system: - -\begin{enumerate} - - \item Yapps parsers are \texttt{ELL(1)} (Extended LL(1)), which is - less powerful than \texttt{LALR} (used by \texttt{PyLR}) or - \texttt{SLR} (used by \texttt{kjParsing}), so Yapps would not be a - good choice for parsing complex languages. For example, allowing - both \texttt{x := 5;} and \texttt{x;} as statements is difficult - because we must distinguish based on only one token of lookahead. - Seeing only \texttt{x}, we cannot decide whether we have an - assignment statement or an expression statement. (Note however - that this kind of grammar can be matched with backtracking; see - section \ref{sec:future}.) - - \item The scanner that Yapps provides can only read from strings, not - files, so an entire file has to be read in before scanning can - begin. It is possible to build a custom scanner, though, so in - cases where stream input is needed (from the console, a network, or - a large file are examples), the Yapps parser can be given a custom - scanner that reads from a stream instead of a string. - - \item Yapps is not designed with efficiency in mind. - -\end{enumerate} - -Yapps provides an easy to use parser generator that produces parsers -similar to what you might write by hand. It is not meant to be a -solution for all parsing problems, but instead an aid for those times -you would write a parser by hand rather than using one of the more -powerful parsing packages available. - -Yapps 2.0 is easier to use than Yapps 1.0. New features include a -less restrictive input syntax, which allows mixing of sequences, -choices, terminals, and nonterminals; optional matching; the ability -to insert single-line statements into the generated parser; and -looping constructs \verb|*| and \verb|+| similar to the repetitive -matching constructs in regular expressions. Unfortunately, the -addition of these constructs has made Yapps 2.0 incompatible with -Yapps 1.0, so grammars will have to be rewritten. See section -\ref{sec:Upgrading} for tips on changing Yapps 1.0 grammars for use -with Yapps 2.0. - -\mysection{Examples} - -In this section are several examples that show the use of Yapps. -First, an introduction shows how to construct grammars and write them -in Yapps form. This example can be skipped by someone familiar with -grammars and parsing. Next is a Lisp expression grammar that produces -a parse tree as output. This example demonstrates the use of tokens -and rules, as well as returning values from rules. The third example -is a expression evaluation grammar that evaluates during parsing -(instead of producing a parse tree). - -\mysubsection{Introduction to Grammars} - -A \emph{grammar} for a natural language specifies how words can be put -together to form large structures, such as phrases and sentences. A -grammar for a computer language is similar in that it specifies how -small components (called \emph{tokens}) can be put together to form -larger structures. In this section we will write a grammar for a tiny -subset of English. - -Simple English sentences can be described as being a noun phrase -followed by a verb followed by a noun phrase. For example, in the -sentence, ``Jack sank the blue ship,'' the word ``Jack'' is the first -noun phrase, ``sank'' is the verb, and ``the blue ship'' is the second -noun phrase. In addition we should say what a noun phrase is; for -this example we shall say that a noun phrase is an optional article -(a, an, the) followed by any number of adjectives followed by a noun. -The tokens in our language are the articles, nouns, verbs, and -adjectives. The \emph{rules} in our language will tell us how to -combine the tokens together to form lists of adjectives, noun phrases, -and sentences: - -\begin{itemize} - \item \texttt{sentence: noun\_phrase verb noun\_phrase} - \item \texttt{noun\_phrase: [article] adjective* noun} -\end{itemize} - -Notice that some things that we said easily in English, such as -``optional article,'' are expressed using special syntax, such as -brackets. When we said, ``any number of adjectives,'' we wrote -\texttt{adjective*}, where the \texttt{*} means ``zero or more of the -preceding pattern''. - -The grammar given above is close to a Yapps grammar. We also have to -specify what the tokens are, and what to do when a pattern is matched. -For this example, we will do nothing when patterns are matched; the -next example will explain how to perform match actions. - -\begin{verbatim} -parser TinyEnglish: - ignore: "\\W+" - token noun: "(Jack|spam|ship)" - token verb: "(sank|threw)" - token article: "(a|an|the)" - token adjective: "(blue|red|green)" - - rule sentence: noun_phrase verb noun_phrase - rule noun_phrase: [article] adjective* noun -\end{verbatim} - -The tokens are specified as Python \emph{regular expressions}. Since -Yapps produces Python code, you can write any regular expression that -would be accepted by Python. (\emph{Note:} These are Python 1.5 -regular expressions from the \texttt{re} module, not Python 1.4 -regular expressions from the \texttt{regex} module.) In addition to -tokens that you want to see (which are given names), you can also -specify tokens to ignore, marked by the \texttt{ignore} keyword. In -this parser we want to ignore whitespace. - -The TinyEnglish grammar shows how you define tokens and rules, but it -does not specify what should happen once we've matched the rules. In -the next example, we will take a grammar and produce a \emph{parse -tree} from it. - -\mysubsection{Lisp Expressions} - -Lisp syntax, although hated by many, has a redeeming quality: it is -simple to parse. In this section we will construct a Yapps grammar to -parse Lisp expressions and produce a parse tree as output. - -\subsubsection*{Defining the Grammar} - -The syntax of Lisp is simple. It has expressions, which are -identifiers, strings, numbers, and lists. A list is a left -parenthesis followed by some number of expressions (separated by -spaces) followed by a right parenthesis. For example, \verb|5|, -\verb|"ni"|, and \verb|(print "1+2 = " (+ 1 2))| are Lisp expressions. -Written as a grammar, - -\begin{verbatim} - expr: ID | STR | NUM | list - list: ( expr* ) -\end{verbatim} - -In addition to having a grammar, we need to specify what to do every -time something is matched. For the tokens, which are strings, we just -want to get the ``value'' of the token, attach its type (identifier, -string, or number) in some way, and return it. For the lists, we want -to construct and return a Python list. - -Once some pattern is matched, we enclose a return statement enclosed -in \verb|{{...}}|. The braces allow us to insert any one-line -statement into the parser. Within this statement, we can refer to the -values returned by matching each part of the rule. After matching a -token such as \texttt{ID}, ``ID'' will be bound to the text of the -matched token. Let's take a look at the rule: - -\begin{verbatim} - rule expr: ID {{ return ('id', ID) }} - ... -\end{verbatim} - -In a rule, tokens return the text that was matched. For identifiers, -we just return the identifier, along with a ``tag'' telling us that -this is an identifier and not a string or some other value. Sometimes -we may need to convert this text to a different form. For example, if -a string is matched, we want to remove quotes and handle special forms -like \verb|\n|. If a number is matched, we want to convert it into a -number. Let's look at the return values for the other tokens: - -\begin{verbatim} - ... - | STR {{ return ('str', eval(STR)) }} - | NUM {{ return ('num', atoi(NUM)) }} - ... -\end{verbatim} - -If we get a string, we want to remove the quotes and process any -special backslash codes, so we run \texttt{eval} on the quoted string. -If we get a number, we convert it to an integer with \texttt{atoi} and -then return the number along with its type tag. - -For matching a list, we need to do something slightly more -complicated. If we match a Lisp list of expressions, we want to -create a Python list with those values. - -\begin{verbatim} - rule list: "\\(" # Match the opening parenthesis - {{ result = [] }} # Create a Python list - ( - expr # When we match an expression, - {{ result.append(expr) }} # add it to the list - )* # * means repeat this if needed - "\\)" # Match the closing parenthesis - {{ return result }} # Return the Python list -\end{verbatim} - -In this rule we first match the opening parenthesis, then go into a -loop. In this loop we match expressions and add them to the list. -When there are no more expressions to match, we match the closing -parenthesis and return the resulting. Note that \verb:#: is used for -comments, just as in Python. - -The complete grammar is specified as follows: -\begin{verbatim} -parser Lisp: - ignore: '\\s+' - token NUM: '[0-9]+' - token ID: '[-+*/!@%^&=.a-zA-Z0-9_]+' - token STR: '"([^\\"]+|\\\\.)*"' - - rule expr: ID {{ return ('id', ID) }} - | STR {{ return ('str', eval(STR)) }} - | NUM {{ return ('num', atoi(NUM)) }} - | list {{ return list }} - rule list: "\\(" {{ result = [] }} - ( expr {{ result.append(expr) }} - )* - "\\)" {{ return result }} -\end{verbatim} - -One thing you may have noticed is that \verb|"\\("| and \verb|"\\)"| -appear in the \texttt{list} rule. These are \emph{inline tokens}: -they appear in the rules without being given a name with the -\texttt{token} keyword. Inline tokens are more convenient to use, but -since they do not have a name, the text that is matched cannot be used -in the return value. They are best used for short simple patterns -(usually punctuation or keywords). - -Another thing to notice is that the number and identifier tokens -overlap. For example, ``487'' matches both NUM and ID. In Yapps, the -scanner only tries to match tokens that are acceptable to the parser. -This rule doesn't help here, since both NUM and ID can appear in the -same place in the grammar. There are two rules used to pick tokens if -more than one matches. One is that the \emph{longest} match is -preferred. For example, ``487x'' will match as an ID (487x) rather -than as a NUM (487) followed by an ID (x). The second rule is that if -the two matches are the same length, the \emph{first} one listed in -the grammar is preferred. For example, ``487'' will match as an NUM -rather than an ID because NUM is listed first in the grammar. Inline -tokens have preference over any tokens you have listed. - -Now that our grammar is defined, we can run Yapps to produce a parser, -and then run the parser to produce a parse tree. - -\subsubsection*{Running Yapps} - -In the Yapps module is a function \texttt{generate} that takes an -input filename and writes a parser to another file. We can use this -function to generate the Lisp parser, which is assumed to be in -\texttt{lisp.g}. - -\begin{verbatim} -% python -Python 1.5.1 (#1, Sep 3 1998, 22:51:17) [GCC 2.7.2.3] on linux-i386 -Copyright 1991-1995 Stichting Mathematisch Centrum, Amsterdam ->>> import yapps ->>> yapps.generate('lisp.g') -\end{verbatim} - -At this point, Yapps has written a file \texttt{lisp.py} that contains -the parser. In that file are two classes (one scanner and one parser) -and a function (called \texttt{parse}) that puts things together for -you. - -Alternatively, we can run Yapps from the command line to generate the -parser file: - -\begin{verbatim} -% python yapps.py lisp.g -\end{verbatim} - -After running Yapps either from within Python or from the command -line, we can use the Lisp parser by calling the \texttt{parse} -function. The first parameter should be the rule we want to match, -and the second parameter should be the string to parse. - -\begin{verbatim} ->>> import lisp ->>> lisp.parse('expr', '(+ 3 4)') -[('id', '+'), ('num', 3), ('num', 4)] ->>> lisp.parse('expr', '(print "3 = " (+ 1 2))') -[('id', 'print'), ('str', '3 = '), [('id', '+'), ('num', 1), ('num', 2)]] -\end{verbatim} - -The \texttt{parse} function is not the only way to use the parser; -section \ref{sec:Parser-Objects} describes how to access parser objects -directly. - -We've now gone through the steps in creating a grammar, writing a -grammar file for Yapps, producing a parser, and using the parser. In -the next example we'll see how rules can take parameters and also how -to do computations instead of just returning a parse tree. - -\mysubsection{Calculator} - -A common example parser given in many textbooks is that for simple -expressions, with numbers, addition, subtraction, multiplication, -division, and parenthesization of subexpressions. We'll write this -example in Yapps, evaluating the expression as we parse. - -Unlike \texttt{yacc}, Yapps does not have any way to specify -precedence rules, so we have to do it ourselves. We say that an -expression is the sum of terms, and that a term is the product of -factors, and that a factor is a number or a parenthesized expression: - -\begin{verbatim} - expr: factor ( ("+"|"-") factor )* - factor: term ( ("*"|"/") term )* - term: NUM | "(" expr ")" -\end{verbatim} - -In order to evaluate the expression as we go, we should keep along an -accumulator while evaluating the lists of terms or factors. Just as -we kept a ``result'' variable to build a parse tree for Lisp -expressions, we will use a variable to evaluate numerical -expressions. The full grammar is given below: - -\begin{verbatim} -parser Calculator: - token END: "$" # $ means end of string - token NUM: "[0-9]+" - - rule goal: expr END {{ return expr }} - - # An expression is the sum and difference of factors - rule expr: factor {{ v = factor }} - ( "[+]" factor {{ v = v+factor }} - | "-" factor {{ v = v-factor }} - )* {{ return v }} - - # A factor is the product and division of terms - rule factor: term {{ v = term }} - ( "[*]" term {{ v = v*term }} - | "/" term {{ v = v/term }} - )* {{ return v }} - - # A term is either a number or an expression surrounded by parentheses - rule term: NUM {{ return atoi(NUM) }} - | "\\(" expr "\\)" {{ return expr }} -\end{verbatim} - -The top-level rule is \emph{goal}, which says that we are looking for -an expression followed by the end of the string. The \texttt{END} -token is needed because without it, it isn't clear when to stop -parsing. For example, the string ``1+3'' could be parsed either as -the expression ``1'' followed by the string ``+3'' or it could be -parsed as the expression ``1+3''. By requiring expressions to end -with \texttt{END}, the parser is forced to take ``1+3''. - -In the two rules with repetition, the accumulator is named \texttt{v}. -After reading in one expression, we initialize the accumulator. Each -time through the loop, we modify the accumulator by adding, -subtracting, multiplying by, or dividing the previous accumulator by -the expression that has been parsed. At the end of the rule, we -return the accumulator. - -The calculator example shows how to process lists of elements using -loops, as well as how to handle precedence of operators. - -\emph{Note:} It's often important to put the \texttt{END} token in, so -put it in unless you are sure that your grammar has some other -non-ambiguous token marking the end of the program. - -\mysubsection{Calculator with Memory} - -In the previous example we learned how to write a calculator that -evaluates simple numerical expressions. In this section we will -extend the example to support both local and global variables. - -To support global variables, we will add assignment statements to the -``goal'' rule. - -\begin{verbatim} - rule goal: expr END {{ return expr }} - | 'set' ID expr END {{ global_vars[ID] = expr }} - {{ return expr }} -\end{verbatim} - -To use these variables, we need a new kind of terminal: - -\begin{verbatim} - rule term: ... | ID {{ return global_vars[ID] }} -\end{verbatim} - -So far, these changes are straightforward. We simply have a global -dictionary \texttt{global\_vars} that stores the variables and values, -we modify it when there is an assignment statement, and we look up -variables in it when we see a variable name. - -To support local variables, we will add variable declarations to the -set of allowed expressions. - -\begin{verbatim} - rule term: ... | 'let' VAR '=' expr 'in' expr ... -\end{verbatim} - -This is where it becomes tricky. Local variables should be stored in -a local dictionary, not in the global one. One trick would be to save -a copy of the global dictionary, modify it, and then restore it -later. In this example we will instead use \emph{attributes} to -create local information and pass it to subrules. - -A rule can optionally take parameters. When we invoke the rule, we -must pass in arguments. For local variables, let's use a single -parameter, \texttt{local\_vars}: - -\begin{verbatim} - rule expr<<local_vars>>: ... - rule factor<<local_vars>>: ... - rule term<<local_vars>>: ... -\end{verbatim} - -Each time we want to match \texttt{expr}, \texttt{factor}, or -\texttt{term}, we will pass the local variables in the current rule to -the subrule. One interesting case is when we pass as an argument -something \emph{other} than \texttt{local\_vars}: - -\begin{verbatim} - rule term<<local_vars>>: ... - | 'let' VAR '=' expr<<local_vars>> - {{ local_vars = [(VAR, expr)] + local_vars }} - 'in' expr<<local_vars>> - {{ return expr }} -\end{verbatim} - -Note that the assignment to the local variables list does not modify -the original list. This is important to keep local variables from -being seen outside the ``let''. - -The other interesting case is when we find a variable: - -\begin{verbatim} -global_vars = {} - -def lookup(map, name): - for x,v in map: if x==name: return v - return global_vars[name] -%% - ... - rule term<<local_vars>: ... - | VAR {{ return lookup(local_vars, VAR) }} -\end{verbatim} - -The lookup function will search through the local variable list, and -if it cannot find the name there, it will look it up in the global -variable dictionary. - -A complete grammar for this example, including a read-eval-print loop -for interacting with the calculator, can be found in the examples -subdirectory included with Yapps. - -In this section we saw how to insert code before the parser. We also -saw how to use attributes to transmit local information from one rule -to its subrules. - -\mysection{Grammars} - -Each Yapps grammar has a name, a list of tokens, and a set of -production rules. A grammar named \texttt{X} will be used to produce -a parser named \texttt{X} and a scanner anmed \texttt{XScanner}. As -in Python, names are case sensitive, start with a letter, and contain -letters, numbers, and underscores (\_). - -There are three kinds of tokens in Yapps: named, inline, and ignored. -As their name implies, named tokens are given a name, using the token -construct: \texttt{token \emph{name} : \emph{regexp}}. In a rule, the -token can be matched by using the name. Inline tokens are regular -expressions that are used in rules without being declared. Ignored -tokens are declared using the ignore construct: \texttt{ignore: - \emph{regexp}}. These tokens are ignored by the scanner, and are -not seen by the parser. Often whitespace is an ignored token. The -regular expressions used to define tokens should use the syntax -defined in the \texttt{re} module, so some symbols may have to be -backslashed. - -Production rules in Yapps have a name and a pattern to match. If the -rule is parameterized, the name should be followed by a list of -parameter names in \verb|<<...>>|. A pattern can be a simple pattern -or a compound pattern. Simple patterns are the name of a named token, -a regular expression in quotes (inline token), the name of a -production rule (followed by arguments in \verb|<<...>>|, if the rule -has parameters), and single line Python statements (\verb|{{...}}|). -Compound patterns are sequences (\verb|A B C ...|), choices ( -\verb:A | B | C | ...:), options (\verb|[...]|), zero-or-more repetitions -(\verb|...*|), and one-or-more repetitions (\verb|...+|). Like -regular expressions, repetition operators have a higher precedence -than sequences, and sequences have a higher precedence than choices. - -Whenever \verb|{{...}}| is used, a legal one-line Python statement -should be put inside the braces. The token \verb|}}| should not -appear within the \verb|{{...}}| section, even within a string, since -Yapps does not attempt to parse the Python statement. A workaround -for strings is to put two strings together (\verb|"}" "}"|), or to use -backslashes (\verb|"}\}"|). At the end of a rule you should use a -\verb|{{ return X }}| statement to return a value. However, you -should \emph{not} use any control statements (\texttt{return}, -\texttt{continue}, \texttt{break}) in the middle of a rule. Yapps -needs to make assumptions about the control flow to generate a parser, -and any changes to the control flow will confuse Yapps. - -The \verb|<<...>>| form can occur in two places: to define parameters -to a rule and to give arguments when matching a rule. Parameters use -the syntax used for Python functions, so they can include default -arguments and the special forms (\verb|*args| and \verb|**kwargs|). -Arguments use the syntax for Python function call arguments, so they -can include normal arguments and keyword arguments. The token -\verb|>>| should not appear within the \verb|<<...>>| section. - -In both the statements and rule arguments, you can use names defined -by the parser to refer to matched patterns. You can refer to the text -matched by a named token by using the token name. You can use the -value returned by a production rule by using the name of that rule. -If a name \texttt{X} is matched more than once (such as in loops), you -will have to save the earlier value(s) in a temporary variable, and -then use that temporary variable in the return value. The next -section has an example of a name that occurs more than once. - -\mysubsection{Left Factoring} -\label{sec:Left-Factoring} - -Yapps produces ELL(1) parsers, which determine which clause to match -based on the first token available. Sometimes the leftmost tokens of -several clauses may be the same. The classic example is the -\emph{if/then/else} construct in Pascal: - -\begin{verbatim} -rule stmt: "if" expr "then" stmt {{ then_part = stmt }} - "else" stmt {{ return ('If',expr,then_part,stmt) }} - | "if" expr "then" stmt {{ return ('If',expr,stmt,[]) }} -\end{verbatim} - -(Note that we have to save the first \texttt{stmt} into a variable -because there is another \texttt{stmt} that will be matched.) The -left portions of the two clauses are the same, which presents a -problem for the parser. The solution is \emph{left-factoring}: the -common parts are put together, and \emph{then} a choice is made about -the remaining part: - -\begin{verbatim} -rule stmt: "if" expr - "then" stmt {{ then_part = stmt }} - {{ else_part = [] }} - [ "else" stmt {{ else_part = stmt }} ] - {{ return ('If', expr, then_part, else_part) }} -\end{verbatim} - -Unfortunately, the classic \emph{if/then/else} situation is -\emph{still} ambiguous when you left-factor. Yapps can deal with this -situation, but will report a warning; see section -\ref{sec:Ambiguous-Grammars} for details. - -In general, replace rules of the form: - -\begin{verbatim} -rule A: a b1 {{ return E1 }} - | a b2 {{ return E2 }} - | c3 {{ return E3 }} - | c4 {{ return E4 }} -\end{verbatim} - -with rules of the form: - -\begin{verbatim} -rule A: a ( b1 {{ return E1 }} - | b2 {{ return E2 }} - ) - | c3 {{ return E3 }} - | c4 {{ return E4 }} -\end{verbatim} - -\mysubsection{Left Recursion} - -A common construct in grammars is for matching a list of patterns, -sometimes separated with delimiters such as commas or semicolons. In -LR-based parser systems, we can parse a list with something like this: - -\begin{verbatim} -rule sum: NUM {{ return NUM }} - | sum "+" NUM {{ return (sum, NUM) }} -\end{verbatim} - -Parsing \texttt{1+2+3+4} would produce the output -\texttt{(((1,2),3),4)}, which is what we want from a left-associative -addition operator. Unfortunately, this grammar is \emph{left -recursive,} because the \texttt{sum} rule contains a clause that -begins with \texttt{sum}. (The recursion occurs at the left side of -the clause.) - -We must restructure this grammar to be \emph{right recursive} instead: - -\begin{verbatim} -rule sum: NUM {{ return NUM }} - | NUM "+" sum {{ return (NUM, sum) }} -\end{verbatim} - -Unfortunately, using this grammar, \texttt{1+2+3+4} would be parsed as -\texttt{(1,(2,(3,4)))}, which no longer follows left associativity. -The rule also needs to be left-factored. Instead, we write the -pattern as a loop instead: - -\begin{verbatim} -rule sum: NUM {{ v = NUM }} - ( "[+]" NUM {{ v = (v,NUM) }} )* - {{ return v }} -\end{verbatim} - -In general, replace rules of the form: - -\begin{verbatim} -rule A: A a1 -> << E1 >> - | A a2 -> << E2 >> - | b3 -> << E3 >> - | b4 -> << E4 >> -\end{verbatim} - -with rules of the form: - -\begin{verbatim} -rule A: ( b3 {{ A = E3 }} - | b4 {{ A = E4 }} ) - ( a1 {{ A = E1 }} - | a2 {{ A = E2 }} )* - {{ return A }} -\end{verbatim} - -We have taken a rule that proved problematic for with recursion and -turned it into a rule that works well with looping constructs. - -\mysubsection{Ambiguous Grammars} -\label{sec:Ambiguous-Grammars} - -In section \ref{sec:Left-Factoring} we saw the classic if/then/else -ambiguity, which occurs because the ``else \ldots'' portion of an ``if -\ldots then \ldots else \ldots'' construct is optional. Programs with -nested if/then/else constructs can be ambiguous when one of the else -clauses is missing: -\begin{verbatim} -if 1 then if 1 then - if 5 then if 5 then - x := 1; x := 1; - else else - y := 9; y := 9; -\end{verbatim} - -The indentation shows that the program can be parsed in two different -ways. (Of course, if we all would adopt Python's indentation-based -structuring, this would never happen!) Usually we want the parsing on -the left: the ``else'' should be associated with the closest ``if'' -statement. In section \ref{sec:Left-Factoring} we ``solved'' the -problem by using the following grammar: - -\begin{verbatim} -rule stmt: "if" expr - "then" stmt {{ then_part = stmt }} - {{ else_part = [] }} - [ "else" stmt {{ else_part = stmt }} ] - {{ return ('If', expr, then_part, else_part) }} -\end{verbatim} - -Here, we have an optional match of ``else'' followed by a statement. -The ambiguity is that if an ``else'' is present, it is not clear -whether you want it parsed immediately or if you want it to be parsed -by the outer ``if''. - -Yapps will deal with the situation by matching when the else pattern -when it can. The parser will work in this case because it prefers the -\emph{first} matching clause, which tells Yapps to parse the ``else''. -That is exactly what we want! - -For ambiguity cases with choices, Yapps will choose the \emph{first} -matching choice. However, remember that Yapps only looks at the first -token to determine its decision, so {\tt (a b | a c)} will result in -Yapps choosing {\tt a b} even when the input is {\tt a c}. It only -looks at the first token, {\tt a}, to make its decision. - -\mysection{Customization} - -Both the parsers and the scanners can be customized. The parser is -usually extended by subclassing, and the scanner can either be -subclassed or completely replaced. - -\mysubsection{Customizing Parsers} - -If additional fields and methods are needed in order for a parser to -work, Python subclassing can be used. (This is unlike parser classes -written in static languages, in which these fields and methods must be -defined in the generated parser class.) We simply subclass the -generated parser, and add any fields or methods required. Expressions -in the grammar can call methods of the subclass to perform any actions -that cannot be expressed as a simple expression. For example, -consider this simple grammar: - -\begin{verbatim} -parser X: - rule goal: "something" {{ self.printmsg() }} -\end{verbatim} - -The \texttt{printmsg} function need not be implemented in the parser -class \texttt{X}; it can be implemented in a subclass: - -\begin{verbatim} -import Xparser - -class MyX(Xparser.X): - def printmsg(self): - print "Hello!" -\end{verbatim} - -\mysubsection{Customizing Scanners} - -The generated parser class is not dependent on the generated scanner -class. A scanner object is passed to the parser object's constructor -in the \texttt{parse} function. To use a different scanner, write -your own function to construct parser objects, with an instance of a -different scanner. Scanner objects must have a \texttt{token} method -that accepts an integer \texttt{N} as well as a list of allowed token -types, and returns the Nth token, as a tuple. The default scanner -raises \texttt{NoMoreTokens} if no tokens are available, and -\texttt{SyntaxError} if no token could be matched. However, the -parser does not rely on these exceptions; only the \texttt{parse} -convenience function (which calls \texttt{wrap\_error\_reporter}) and -the \texttt{print\_error} error display function use those exceptions. - -The tuples representing tokens have four elements. The first two are -the beginning and ending indices of the matched text in the input -string. The third element is the type tag, matching either the name -of a named token or the quoted regexp of an inline or ignored token. -The fourth element of the token tuple is the matched text. If the -input string is \texttt{s}, and the token tuple is -\texttt{(b,e,type,val)}, then \texttt{val} should be equal to -\texttt{s[b:e]}. - -The generated parsers do not the beginning or ending index. They use -only the token type and value. However, the default error reporter -uses the beginning and ending index to show the user where the error -is. - -\mysection{Parser Mechanics} - -The base parser class (Parser) defines two methods, \texttt{\_scan} -and \texttt{\_peek}, and two fields, \texttt{\_pos} and -\texttt{\_scanner}. The generated parser inherits from the base -parser, and contains one method for each rule in the grammar. To -avoid name clashes, do not use names that begin with an underscore -(\texttt{\_}). - -\mysubsection{Parser Objects} -\label{sec:Parser-Objects} - -Yapps produces as output two exception classes, a scanner class, a -parser class, and a function \texttt{parse} that puts everything -together. The \texttt{parse} function does not have to be used; -instead, one can create a parser and scanner object and use them -together for parsing. - -\begin{verbatim} - def parse(rule, text): - P = X(XScanner(text)) - return wrap_error_reporter(P, rule) -\end{verbatim} - -The \texttt{parse} function takes a name of a rule and an input string -as input. It creates a scanner and parser object, then calls -\texttt{wrap\_error\_reporter} to execute the method in the parser -object named \texttt{rule}. The wrapper function will call the -appropriate parser rule and report any parsing errors to standard -output. - -There are several situations in which the \texttt{parse} function -would not be useful. If a different parser or scanner is being used, -or exceptions are to be handled differently, a new \texttt{parse} -function would be required. The supplied \texttt{parse} function can -be used as a template for writing a function for your own needs. An -example of a custom parse function is the \texttt{generate} function -in \texttt{Yapps.py}. - -\mysubsection{Context Sensitive Scanner} - -Unlike most scanners, the scanner produced by Yapps can take into -account the context in which tokens are needed, and try to match only -good tokens. For example, in the grammar: - -\begin{verbatim} -parser IniFile: - token ID: "[a-zA-Z_0-9]+" - token VAL: ".*" - - rule pair: ID "[ \t]*=[ \t]*" VAL "\n" -\end{verbatim} - -we would like to scan lines of text and pick out a name/value pair. -In a conventional scanner, the input string \texttt{shell=progman.exe} -would be turned into a single token of type \texttt{VAL}. The Yapps -scanner, however, knows that at the beginning of the line, an -\texttt{ID} is expected, so it will return \texttt{"shell"} as a token -of type \texttt{ID}. Later, it will return \texttt{"progman.exe"} as -a token of type \texttt{VAL}. - -Context sensitivity decreases the separation between scanner and -parser, but it is useful in parsers like \texttt{IniFile}, where the -tokens themselves are not unambiguous, but \emph{are} unambiguous -given a particular stage in the parsing process. - -Unfortunately, context sensitivity can make it more difficult to -detect errors in the input. For example, in parsing a Pascal-like -language with ``begin'' and ``end'' as keywords, a context sensitive -scanner would only match ``end'' as the END token if the parser is in -a place that will accept the END token. If not, then the scanner -would match ``end'' as an identifier. To disable the context -sensitive scanner in Yapps, add the -\texttt{context-insensitive-scanner} option to the grammar: - -\begin{verbatim} -Parser X: - option: "context-insensitive-scanner" -\end{verbatim} - -Context-insensitive scanning makes the parser look cleaner as well. - -\mysubsection{Internal Variables} - -There are two internal fields that may be of use. The parser object -has two fields, \texttt{\_pos}, which is the index of the current -token being matched, and \texttt{\_scanner}, which is the scanner -object. The token itself can be retrieved by accessing the scanner -object and calling the \texttt{token} method with the token index. However, if you call \texttt{token} before the token has been requested by the parser, it may mess up a context-sensitive scanner.\footnote{When using a context-sensitive scanner, the parser tells the scanner what the valid token types are at each point. If you call \texttt{token} before the parser can tell the scanner the valid token types, the scanner will attempt to match without considering the context.} A -potentially useful combination of these fields is to extract the -portion of the input matched by the current rule. To do this, just save the scanner state (\texttt{\_scanner.pos}) before the text is matched and then again after the text is matched: - -\begin{verbatim} - rule R: - {{ start = self._scanner.pos }} - a b c - {{ end = self._scanner.pos }} - {{ print 'Text is', self._scanner.input[start:end] }} -\end{verbatim} - -\mysubsection{Pre- and Post-Parser Code} - -Sometimes the parser code needs to rely on helper variables, -functions, and classes. A Yapps grammar can optionally be surrounded -by double percent signs, to separate the grammar from Python code. - -\begin{verbatim} -... Python code ... -%% -... Yapps grammar ... -%% -... Python code ... -\end{verbatim} - -The second \verb|%%| can be omitted if there is no Python code at the -end, and the first \verb|%%| can be omitted if there is no extra -Python code at all. (To have code only at the end, both separators -are required.) - -If the second \verb|%%| is omitted, Yapps will insert testing code -that allows you to use the generated parser to parse a file. - -The extended calculator example in the Yapps examples subdirectory -includes both pre-parser and post-parser code. - -\mysubsection{Representation of Grammars} - -For each kind of pattern there is a class derived from Pattern. Yapps -has classes for Terminal, NonTerminal, Sequence, Choice, Option, Plus, -Star, and Eval. Each of these classes has the following interface: - -\begin{itemize} - \item[setup(\emph{gen})] Set accepts-$\epsilon$, and call - \emph{gen.changed()} if it changed. This function can change the - flag from false to true but \emph{not} from true to false. - \item[update(\emph(gen))] Set \first and \follow, and call - \emph{gen.changed()} if either changed. This function can add to - the sets but \emph{not} remove from them. - \item[output(\emph{gen}, \emph{indent})] Generate code for matching - this rule, using \emph{indent} as the current indentation level. - Writes are performed using \emph{gen.write}. - \item[used(\emph{vars})] Given a list of variables \emph{vars}, - return two lists: one containing the variables that are used, and - one containing the variables that are assigned. This function is - used for optimizing the resulting code. -\end{itemize} - -Both \emph{setup} and \emph{update} monotonically increase the -variables they modify. Since the variables can only increase a finite -number of times, we can repeatedly call the function until the -variable stabilized. The \emph{used} function is not currently -implemented. - -With each pattern in the grammar Yapps associates three pieces of -information: the \first set, the \follow set, and the -accepts-$\epsilon$ flag. - -The \first set contains the tokens that can appear as we start -matching the pattern. The \follow set contains the tokens that can -appear immediately after we match the pattern. The accepts-$\epsilon$ -flag is true if the pattern can match no tokens. In this case, \first -will contain all the elements in \follow. The \follow set is not -needed when accepts-$\epsilon$ is false, and may not be accurate in -those cases. - -Yapps does not compute these sets precisely. Its approximation can -miss certain cases, such as this one: - -\begin{verbatim} - rule C: ( A* | B ) - rule B: C [A] -\end{verbatim} - -Yapps will calculate {\tt C}'s \follow set to include {\tt A}. -However, {\tt C} will always match all the {\tt A}'s, so {\tt A} will -never follow it. Yapps 2.0 does not properly handle this construct, -but if it seems important, I may add support for it in a future -version. - -Yapps also cannot handle constructs that depend on the calling -sequence. For example: - -\begin{verbatim} - rule R: U | 'b' - rule S: | 'c' - rule T: S 'b' - rule U: S 'a' -\end{verbatim} - -The \follow set for {\tt S} includes {\tt a} and {\tt b}. Since {\tt - S} can be empty, the \first set for {\tt S} should include {\tt a}, -{\tt b}, and {\tt c}. However, when parsing {\tt R}, if the lookahead -is {\tt b} we should \emph{not} parse {\tt U}. That's because in {\tt - U}, {\tt S} is followed by {\tt a} and not {\tt b}. Therefore in -{\tt R}, we should choose rule {\tt U} only if there is an {\tt a} or -{\tt c}, but not if there is a {\tt b}. Yapps and many other LL(1) -systems do not distinguish {\tt S b} and {\tt S a}, making {\tt - S}'s \follow set {\tt a, b}, and making {\tt R} always try to match -{\tt U}. In this case we can solve the problem by changing {\tt R} to -\verb:'b' | U: but it may not always be possible to solve all such -problems in this way. - -\appendix - -\mysection{Grammar for Parsers} - -This is the grammar for parsers, without any Python code mixed in. -The complete grammar can be found in \texttt{parsedesc.g} in the Yapps -distribution. - -\begin{verbatim} -parser ParserDescription: - ignore: "\\s+" - ignore: "#.*?\r?\n" - token END: "$" # $ means end of string - token ATTR: "<<.+?>>" - token STMT: "{{.+?}}" - token ID: '[a-zA-Z_][a-zA-Z_0-9]*' - token STR: '[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"' - - rule Parser: "parser" ID ":" - Options - Tokens - Rules - END - - rule Options: ( "option" ":" STR )* - rule Tokens: ( "token" ID ":" STR | "ignore" ":" STR )* - rule Rules: ( "rule" ID OptParam ":" ClauseA )* - - rule ClauseA: ClauseB ( '[|]' ClauseB )* - rule ClauseB: ClauseC* - rule ClauseC: ClauseD [ '[+]' | '[*]' ] - rule ClauseD: STR | ID [ATTR] | STMT - | '\\(' ClauseA '\\) | '\\[' ClauseA '\\]' -\end{verbatim} - -\mysection{Upgrading} - -Yapps 2.0 is not backwards compatible with Yapps 1.0. In this section -are some tips for upgrading: - -\begin{enumerate} - \item Yapps 1.0 was distributed as a single file. Yapps 2.0 is - instead distributed as two Python files: a \emph{parser generator} - (26k) and a \emph{parser runtime} (5k). You need both files to - create parsers, but you need only the runtime (\texttt{yappsrt.py}) - to use the parsers. - - \item Yapps 1.0 supported Python 1.4 regular expressions from the - \texttt{regex} module. Yapps 2.0 uses Python 1.5 regular - expressions from the \texttt{re} module. \emph{The new syntax for - regular expressions is not compatible with the old syntax.} - Andrew Kuchling has a \htmladdnormallink{guide to converting - regular - expressions}{http://www.python.org/doc/howto/regex-to-re/} on his - web page. - - \item Yapps 1.0 wants a pattern and then a return value in \verb|->| - \verb|<<...>>|. Yapps 2.0 allows patterns and Python statements to - be mixed. To convert a rule like this: - -\begin{verbatim} -rule R: A B C -> << E1 >> - | X Y Z -> << E2 >> -\end{verbatim} - - to Yapps 2.0 form, replace the return value specifiers with return - statements: - -\begin{verbatim} -rule R: A B C {{ return E1 }} - | X Y Z {{ return E2 }} -\end{verbatim} - - \item Yapps 2.0 does not perform tail recursion elimination. This - means any recursive rules you write will be turned into recursive - methods in the parser. The parser will work, but may be slower. - It can be made faster by rewriting recursive rules, using instead - the looping operators \verb|*| and \verb|+| provided in Yapps 2.0. - -\end{enumerate} - -\mysection{Troubleshooting} - -\begin{itemize} - \item A common error is to write a grammar that doesn't have an END - token. End tokens are needed when it is not clear when to stop - parsing. For example, when parsing the expression {\tt 3+5}, it is - not clear after reading {\tt 3} whether to treat it as a complete - expression or whether the parser should continue reading. - Therefore the grammar for numeric expressions should include an end - token. Another example is the grammar for Lisp expressions. In - Lisp, it is always clear when you should stop parsing, so you do - \emph{not} need an end token. In fact, it may be more useful not - to have an end token, so that you can read in several Lisp expressions. - \item If there is a chance of ambiguity, make sure to put the choices - in the order you want them checked. Usually the most specific - choice should be first. Empty sequences should usually be last. - \item The context sensitive scanner is not appropriate for all - grammars. You might try using the insensitive scanner with the - {\tt context-insensitive-scanner} option in the grammar. - \item If performance turns out to be a problem, try writing a custom - scanner. The Yapps scanner is rather slow (but flexible and easy - to understand). -\end{itemize} - -\mysection{History} - -Yapps 1 had several limitations that bothered me while writing -parsers: - -\begin{enumerate} - \item It was not possible to insert statements into the generated - parser. A common workaround was to write an auxilliary function - that executed those statements, and to call that function as part - of the return value calculation. For example, several of my - parsers had an ``append(x,y)'' function that existed solely to call - ``x.append(y)''. - \item The way in which grammars were specified was rather - restrictive: a rule was a choice of clauses. Each clause was a - sequence of tokens and rule names, followed by a return value. - \item Optional matching had to be put into a separate rule because - choices were only made at the beginning of a rule. - \item Repetition had to be specified in terms of recursion. Not only - was this awkward (sometimes requiring additional rules), I had to - add a tail recursion optimization to Yapps to transform the - recursion back into a loop. -\end{enumerate} - -Yapps 2 addresses each of these limitations. - -\begin{enumerate} - \item Statements can occur anywhere within a rule. (However, only - one-line statements are allowed; multiline blocks marked by - indentation are not.) - \item Grammars can be specified using any mix of sequences, choices, - tokens, and rule names. To allow for complex structures, - parentheses can be used for grouping. - \item Given choices and parenthesization, optional matching can be - expressed as a choice between some pattern and nothing. In - addition, Yapps 2 has the convenience syntax \verb|[A B ...]| for - matching \verb|A B ...| optionally. - \item Repetition operators \verb|*| for zero or more and \verb|+| for - one or more make it easy to specify repeating patterns. -\end{enumerate} - -It is my hope that Yapps 2 will be flexible enough to meet my needs -for another year, yet simple enough that I do not hesitate to use it. - -\mysection{Future Extensions} -\label{sec:future} - -I am still investigating the possibility of LL(2) and higher -lookahead. However, it looks like the resulting parsers will be -somewhat ugly. - -It would be nice to control choices with user-defined predicates. - -The most likely future extension is backtracking. A grammar pattern -like \verb|(VAR ':=' expr)? {{ return Assign(VAR,expr) }} : expr {{ return expr }}| -would turn into code that attempted to match \verb|VAR ':=' expr|. If -it succeeded, it would run \verb|{{ return ... }}|. If it failed, it -would match \verb|expr {{ return expr }}|. Backtracking may make it -less necessary to write LL(2) grammars. - -\mysection{References} - -\begin{enumerate} - \item The \htmladdnormallink{Python-Parser - SIG}{http://www.python.org/sigs/parser-sig/} is the first place - to look for a list of parser systems for Python. - - \item ANTLR/PCCTS, by Terrence Parr, is available at - \htmladdnormallink{The ANTLR Home Page}{http://www.antlr.org/}. - - \item PyLR, by Scott Cotton, is at \htmladdnormallink{his Starship - page}{http://starship.skyport.net/crew/scott/PyLR.html}. - - \item John Aycock's \htmladdnormallink{Compiling Little Languages - Framework}{http://www.csr.UVic.CA/~aycock/python/}. - - \item PyBison, by Scott Hassan, can be found at - \htmladdnormallink{his Python Projects - page}{http://coho.stanford.edu/\~{}hassan/Python/}. - - \item mcf.pars, by Mike C. Fletcher, is available at - \htmladdnormallink{his web - page}{http://www.golden.net/\~{}mcfletch/programming/}. - - \item kwParsing, by Aaron Watters, is available at - \htmladdnormallink{his Starship - page}{http://starship.skyport.net/crew/aaron_watters/kwParsing/}. -\end{enumerate} - -\end{document} diff --git a/util/newconfig/yappsrt.py b/util/newconfig/yappsrt.py deleted file mode 100644 index 2ce2480f08..0000000000 --- a/util/newconfig/yappsrt.py +++ /dev/null @@ -1,172 +0,0 @@ -# Yapps 2.0 Runtime -# -# This module is needed to run generated parsers. - -from string import * -import exceptions -import re - -class SyntaxError(Exception): - """When we run into an unexpected token, this is the exception to use""" - def __init__(self, pos=-1, msg="Bad Token"): - self.pos = pos - self.msg = msg - def __repr__(self): - if self.pos < 0: return "#<syntax-error>" - else: return "SyntaxError[@ char " + `self.pos` + ": " + self.msg + "]" - -class NoMoreTokens(Exception): - """Another exception object, for when we run out of tokens""" - pass - -class Scanner: - def __init__(self, patterns, ignore, input): - """Patterns is [(terminal,regex)...] - Ignore is [terminal,...]; - Input is a string""" - self.tokens = [] - self.restrictions = [] - self.input = input - self.pos = 0 - self.ignore = ignore - # The stored patterns are a pair (compiled regex,source - # regex). If the patterns variable passed in to the - # constructor is None, we assume that the class already has a - # proper .patterns list constructed - if patterns is not None: - self.patterns = [] - for k,r in patterns: - self.patterns.append( (k, re.compile(r)) ) - - def token(self, i, restrict=0): - """Get the i'th token, and if i is one past the end, then scan - for another token; restrict is a list of tokens that - are allowed, or 0 for any token.""" - if i == len(self.tokens): self.scan(restrict) - if i < len(self.tokens): - # Make sure the restriction is more restricted - if restrict and self.restrictions[i]: - for r in restrict: - if r not in self.restrictions[i]: - raise "Unimplemented: restriction set changed" - return self.tokens[i] - raise NoMoreTokens() - - def __repr__(self): - """Print the last 10 tokens that have been scanned in""" - output = '' - for t in self.tokens[-10:]: - output = '%s\n (@%s) %s = %s' % (output,t[0],t[2],`t[3]`) - return output - - def scan(self, restrict): - """Should scan another token and add it to the list, self.tokens, - and add the restriction to self.restrictions""" - # Keep looking for a token, ignoring any in self.ignore - while 1: - # Search the patterns for the longest match, with earlier - # tokens in the list having preference - best_match = -1 - best_pat = '(error)' - for p, regexp in self.patterns: - # First check to see if we're ignoring this token - if restrict and p not in restrict and p not in self.ignore: - continue - m = regexp.match(self.input, self.pos) - if m and len(m.group(0)) > best_match: - # We got a match that's better than the previous one - best_pat = p - best_match = len(m.group(0)) - - # If we didn't find anything, raise an error - if best_pat == '(error)' and best_match < 0: - msg = "Bad Token" - if restrict: - msg = "Trying to find one of "+join(restrict,", ") - raise SyntaxError(self.pos, msg) - - # If we found something that isn't to be ignored, return it - if best_pat not in self.ignore: - # Create a token with this data - token = (self.pos, self.pos+best_match, best_pat, - self.input[self.pos:self.pos+best_match]) - self.pos = self.pos + best_match - # Only add this token if it's not in the list - # (to prevent looping) - if not self.tokens or token != self.tokens[-1]: - self.tokens.append(token) - self.restrictions.append(restrict) - return - else: - # This token should be ignored .. - self.pos = self.pos + best_match - -class Parser: - def __init__(self, scanner): - self._scanner = scanner - self._pos = 0 - - def _peek(self, *types): - """Returns the token type for lookahead; if there are any args - then the list of args is the set of token types to allow""" - tok = self._scanner.token(self._pos, types) - return tok[2] - - def _scan(self, type): - """Returns the matched text, and moves to the next token""" - tok = self._scanner.token(self._pos, [type]) - if tok[2] != type: - raise SyntaxError(tok[0], 'Trying to find '+type) - self._pos = 1+self._pos - return tok[3] - - - -def print_error(input, err, scanner): - """This is a really dumb long function to print error messages nicely.""" - p = err.pos - # Figure out the line number - line = count(input[:p], '\n') - print err.msg+" on line "+`line+1`+":" - # Now try printing part of the line - text = input[max(p-80,0):p+80] - p = p - max(p-80,0) - - # Strip to the left - i = rfind(text[:p],'\n') - j = rfind(text[:p],'\r') - if i < 0 or (j < i and j >= 0): i = j - if i >= 0 and i < p: - p = p - i - 1 - text = text[i+1:] - - # Strip to the right - i = find(text,'\n',p) - j = find(text,'\r',p) - if i < 0 or (j < i and j >= 0): i = j - if i >= 0: - text = text[:i] - - # Now shorten the text - while len(text) > 70 and p > 60: - # Cut off 10 chars - text = "..." + text[10:] - p = p - 7 - - # Now print the string, along with an indicator - print '> ',text - print '> ',' '*p + '^' - print 'List of nearby tokens:', scanner - -def wrap_error_reporter(parser, rule): - try: return getattr(parser, rule)() - except SyntaxError, s: - input = parser._scanner.input - try: - print_error(input, s, parser._scanner) - except ImportError: - print 'Syntax Error',s.msg,'on line',1+count(input[:s.pos], '\n') - except NoMoreTokens: - print 'Could not complete parsing; stopped around here:' - print parser._scanner - |