Make newlib manpages via DocBook XML (v3)

Add makedocbook, a tool to process makedoc markup and output DocBook XML refentries. Process all the source files which are processed with makedoc with makedocbook as well Add chapter-texi2docbook, a tool to automatically generate DocBook XML chapter files from the chapter .texi files. For generating man pages all we care about is the content of the refentries, so all this needs to do is convert the @include of the makedoc generated .def files to xi:include of the makedocbook generated .xml files. Add skeleton Docbook XML book files, lib[cm].in.xml which include these generated chapters, which in turn include the generated files containing refentries, which is processed with xsltproc to generate the lib[cm].xml Add new make targets to generate and install man pages from lib[cm].xml
2016-06-24 21:50:15 +01:00 · 2016-06-24 21:50:15 +01:00 · 041ea41068
parent fdc45c4bfb
commit 041ea41068
16 changed files with 1047 additions and 4 deletions
--- a/newlib/Makefile.am
+++ b/newlib/Makefile.am
@ -362,6 +362,21 @@ dvi-recursive: doc/makedoc
 doc/makedoc:
 	cd doc && $(MAKE) all
 # Recursive targets for man and install-man
 man:
 	for d in $(SUBDIRS); do \
 	  if test "$$d" != "."; then \
 	    (cd $$d && $(MAKE) man) || exit 1; \
 	  fi; \
 	done
 install-man:
 	for d in $(SUBDIRS); do \
 	  if test "$$d" != "."; then \
 	    (cd $$d && $(MAKE) install-man) || exit 1; \
 	  fi; \
 	done
 CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host
 # Multilib support.
--- a/newlib/Makefile.shared
+++ b/newlib/Makefile.shared
@ -9,7 +9,7 @@ objectlist.awk.in: $(noinst_LTLIBRARIES)
 # documentation rules
 #
-SUFFIXES = .def
+SUFFIXES = .def .xml
 CHEW = ${top_builddir}/../doc/makedoc -f $(top_srcdir)/../doc/doc.str
@ -25,4 +25,18 @@ doc: $(CHEWOUT_FILES)
 	  cat $(srcdir)/$$chapter >> $(TARGETDOC) ; \
 	done
-CLEANFILES = $(CHEWOUT_FILES) *.ref
+DOCBOOK_CHEW = ${top_srcdir}/../doc/makedocbook.py
 DOCBOOK_OUT_FILES = $(CHEWOUT_FILES:.def=.xml)
 DOCBOOK_CHAPTERS = $(CHAPTERS:.tex=.xml)
 .c.xml:
 	$(DOCBOOK_CHEW) < $< > $*.xml || ( rm $*.xml && false )
 	@touch stmp-xml
 docbook: $(DOCBOOK_OUT_FILES)
 	for chapter in $(DOCBOOK_CHAPTERS) ; \
 	do \
 	  ${top_srcdir}/../doc/chapter-texi2docbook.py <$(srcdir)/$${chapter%.xml}.tex >../$$chapter ; \
 	done
 CLEANFILES = $(CHEWOUT_FILES) $(CHEWOUT_FILES:.def=.ref) $(DOCBOOK_OUT_FILES)
--- a/newlib/doc/.gitignore
+++ b/newlib/doc/.gitignore
@ -0,0 +1,3 @@
 # PLY artefacts
 parser.out
 parsetab.py
--- a/newlib/doc/Makefile.am
+++ b/newlib/doc/Makefile.am
@ -19,3 +19,5 @@ makedoc.o: makedoc.c
 ACLOCAL_AMFLAGS = -I .. -I ../..
 CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host
 man:
--- a/newlib/doc/chapter-texi2docbook.py
+++ b/newlib/doc/chapter-texi2docbook.py
@ -0,0 +1,45 @@
 #!/usr/bin/env python
 #
 # python script to convert the handwritten chapter .texi files, which include
 # the generated files for each function, to DocBook XML
 #
 # all we care about is the content of the refentries, so all this needs to do is
 # convert the @include of the makedoc generated .def files to xi:include of the
 # makedocbook generated .xml files.
 #
 from __future__ import print_function
 import sys
 import re
 def main():
    first_node = True
    print ('<?xml version="1.0" encoding="UTF-8"?>')
    print ('<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">')
    for l in sys.stdin.readlines():
 	l = l.rstrip()
 	# transform @file{foo} to <filename>foo</filename>
 	l = re.sub("@file{(.*?)}", "<filename>\\1</filename>", l)
 	if l.startswith("@node"):
 	    l = l.replace("@node", "", 1)
 	    l = l.strip()
 	    l = l.lower()
 	    if first_node:
 		print ('<chapter id="%s" xmlns:xi="http://www.w3.org/2001/XInclude">' % l.replace(' ', '_'))
 		first_node = False
 	elif l.startswith("@chapter "):
 	    l = l.replace("@chapter ", "", 1)
 	    print ('<title>%s</title>' % l)
 	elif l.startswith("@include "):
 	    l = l.replace("@include ", "", 1)
 	    l = l.replace(".def", ".xml", 1)
 	    print ('<xi:include href="%s"/>' % l.strip())
    print ('</chapter>')
 if __name__ == "__main__" :
    main()
--- a/newlib/doc/makedocbook.py
+++ b/newlib/doc/makedocbook.py
@ -0,0 +1,834 @@
 #!/usr/bin/env python
 #
 # python script to process makedoc instructions in a source file and produce
 # DocBook XML output
 #
 #
 # This performs 3 stages of processing on it's input, in a similar fashion
 # to makedoc:
 #
 # 1. Discard everything outside of /*  */ comments
 # 2. Identify lines which contains commands (a single uppercase word)
 # 3. Apply each command to the text of the following lines (up to the next
 #    command or the end of the comment block), to produce some output
 #
 # The resulting output contains one or more DocBook XML refentry elements.
 #
 # To make the output a valid XML document which can be xincluded, those refentry
 # elements are contained by a refcontainer element.  refcontainer is not part of
 # the DocBook DTD and should be removed by a suitable XSLT.
 #
 from __future__ import print_function
 import sys
 import re
 from optparse import OptionParser
 import lxml.etree
 import ply.lex as lex
 import ply.yacc as yacc
 rootelement = None # root element of the XML tree
 refentry = None # the current refentry
 verbose = 0
 def dump(s, stage, threshold = 1):
    if verbose > threshold:
 	print('*' * 40, file=sys.stderr)
 	print(stage, file=sys.stderr)
 	print('*' * 40, file=sys.stderr)
 	print('%s' % s, file=sys.stderr)
 	print('*' * 40, file=sys.stderr)
 #
 # Stage 1
 #
 def skip_whitespace_and_stars(i, src):
    while i < len(src) and (src[i].isspace() or (src[i] == '*' and src[i+1] != '/')):
 	i += 1
    return i
 # Discard everything not inside '/*  */' style-comments which start at column 0
 # Discard any leading blank space or '*'
 # Discard a single leading '.'
 # Discard blank lines after a blank line
 def comment_contents_generator(src):
    i = 0
    while i < len(src) - 2:
 	if src[i] == '\n' and src[i+1] == '/' and src[i+2] == '*':
 	    i = i + 3
 	    i = skip_whitespace_and_stars(i, src)
 	    if src[i] == '.':
 		i += 1
 	    while i < len(src):
 		if src[i] == '\n':
 		    yield '\n'
 		    i += 1
 		    # allow a single blank line
 		    if i < len(src) and src[i] == '\n':
 			yield '\n'
 			i += 1
 		    i = skip_whitespace_and_stars(i, src)
 		elif src[i] == '*' and src[i+1] == '/':
 		    i = i + 2
 		    # If we have just output \n\n, this adds another blank line.
 		    # This is the only way a double blank line can occur.
 		    yield '\nEND\n'
 		    break
 		else:
 		    yield src[i]
 		    i += 1
 	else:
 	    i += 1
 def remove_noncomments(src):
    src = '\n' + src
    dst = ''.join(comment_contents_generator(src))
    dump(dst, 'extracted from comments')
    return dst
 #
 # Stage 2
 #
 # A command is a single word of at least 3 characters, all uppercase, and alone on a line
 def iscommand(l):
    if re.match('^[A-Z_]{3,}\s*$', l):
 	return True
    return False
 def command_block_generator(content):
    command = 'START'
    text = ''
    for l in content.splitlines():
 	if iscommand(l):
 	    yield (command, text)
 	    command = l.rstrip()
 	    text = ''
 	else:
 	    text = text + l + '\n'
    yield (command, text)
 # Look for commands, which give instructions how to process the following input
 def process(content):
    content = content.lstrip()
    dump(content, 'about to process for commands')
    # process into a list of tuples of commands and the associated following text
    # it is important to maintain the order of the sections the commands generate
    processed = list(command_block_generator(content))
    return processed
 #
 # Stage 3
 #
 #  invoke each command on it's text
 def perform(processed):
    for i in processed:
 	c = i[0].rstrip()
 	t = i[1].strip() + '\n'
 	if verbose:
 	    print("performing command '%s'" % c, file=sys.stderr)
 	if c in command_dispatch_dict:
 	    command_dispatch_dict[c](c, t)
 	else:
 	    print("command '%s' is not recognized" % c, file=sys.stderr)
 	    # the text following an unrecognized command is discarded
 # FUNCTION (aka TYPEDEF)
 #
 def function(c, l):
    global refentry
    global rootelement
    l = l.strip()
    if verbose:
 	print('FUNCTION %s' % l, file=sys.stderr)
    separator = '---'
    if ';' in l:
 	# fpclassify has an unusual format we also need to handle
 	spliton = ';'
 	l = l.splitlines()[0]
    elif len(l.splitlines()) > 1:
 	# a few pages like mktemp have two '---' lines
 	spliton = ';'
 	o = ''
 	for i in l.splitlines():
 	     if separator in i:
 		 o += i + ';'
 	     else:
 		 o += i
 	l = o[:-1]
    else:
 	spliton = '\n'
    namelist = []
    descrlist = []
    for a in l.split(spliton):
 	(n, d) = a.split(separator, 1)
 	namelist = namelist + n.split(',')
 	descrlist = descrlist + [d]
    # only copysign and log1p use <[ ]> markup in descr,
    # only gets() uses << >> markup
    # but we should handle it correctly
    descr = line_markup_convert(', '.join(descrlist))
    # fpclassify includes an 'and' we need to discard
    namelist = map(lambda v: re.sub('^and ', '', v.strip(), 1), namelist)
    # strip off << >> surrounding name
    namelist = map(lambda v: v.strip().lstrip('<').rstrip('>'), namelist)
    if verbose:
 	print(namelist, file=sys.stderr)
    # additional alternate names may also appear in INDEX commands
    # create the root element if needed
    if rootelement is None:
 	rootelement = lxml.etree.Element('refentrycontainer')
    # FUNCTION implies starting a new refentry
    if refentry is not None:
 	print("multiple FUNCTIONs without NEWPAGE", file=sys.stderr)
 	exit(1)
    # create the refentry
    refentry = lxml.etree.SubElement(rootelement, 'refentry')
    refentry.append(lxml.etree.Comment(' Generated by makedocbook.py '))
    refentry.set('id', namelist[0].lstrip('_'))
    refmeta = lxml.etree.SubElement(refentry, 'refmeta')
    # refentrytitle will be same as refdescriptor, the primary name
    refentrytitle = lxml.etree.SubElement(refmeta, 'refentrytitle')
    refentrytitle.text = namelist[0]
    manvolnum = lxml.etree.SubElement(refmeta, 'manvolnum')
    manvolnum.text = '3'
    refnamediv = lxml.etree.SubElement(refentry, 'refnamediv')
    # refdescriptor is the primary name, assume we should use the one which
    # appears first in the list
    refdescriptor = lxml.etree.SubElement(refnamediv, 'refdescriptor')
    refdescriptor.text = namelist[0]
    # refname elements exist for all alternate names
    for n in namelist:
 	refname = lxml.etree.SubElement(refnamediv, 'refname')
 	refname.text = n
    refpurpose = lxml.etree.SubElement(refnamediv, 'refpurpose')
    refnamediv.replace(refpurpose, lxml.etree.fromstring('<refpurpose>' + descr + '</refpurpose>'))
    # Only FUNCTION currently exists, which implies that the SYNOPSIS should be
    # a funcsynopsis.  If TYPEDEF was to be added, SYNOPSIS should be processed
    # in a different way, probably producing a refsynopsis.
 # INDEX
 # may occur more than once for each FUNCTION giving alternate names this
 # function should be indexed under
 #
 def index(c, l):
    l = l.strip()
    if verbose:
 	print('INDEX %s' % l, file=sys.stderr)
    # discard anything after the first word
    l = l.split()[0]
    # add indexterm
    # (we could just index under all the refnames, but we control the indexing
    # separately as that is what makedoc does)
    indexterm = lxml.etree.SubElement(refentry, 'indexterm')
    primary = lxml.etree.SubElement(indexterm, 'primary')
    primary.text = l
    # to validate, it seems we need to maintain refentry elements in a certain order
    refentry[:] = sorted(refentry, key = lambda x: x.tag)
    # adds another alternate refname
    refnamediv = refentry.find('refnamediv')
    # as long as it doesn't already exist
    if not refnamediv.xpath(('refname[.="%s"]') % l):
 	refname = lxml.etree.SubElement(refnamediv, 'refname')
 	refname.text = l
 	if verbose > 1:
 	    print('added refname %s' % l, file=sys.stderr)
    else:
 	if verbose > 1:
 	    print('duplicate refname %s discarded' % l, file=sys.stderr)
    # to validate, it seems we need to maintain refnamediv elements in a certain order
    refnamediv[:] = sorted(refnamediv, key = lambda x: x.tag)
 # SYNOPSIS aka ANSI_SYNOPSIS
 # ANSI-style synopsis
 #
 # Note that makedoc would also process <<code>> markup here, but there are no
 # such uses.
 #
 def synopsis(c, t):
    refsynopsisdiv = lxml.etree.SubElement(refentry, 'refsynopsisdiv')
    funcsynopsis = lxml.etree.SubElement(refsynopsisdiv, 'funcsynopsis')
    s = ''
    for l in t.splitlines():
 	if re.match('\s*[#[]', l):
 	    # a #include, #define etc.
 	    # fpclassify contains some comments in [ ] brackets
 	    funcsynopsisinfo = lxml.etree.SubElement(funcsynopsis, 'funcsynopsisinfo')
 	    funcsynopsisinfo.text = l.strip() + '\n'
 	else:
 	    s = s + l
 	    # a prototype without a terminating ';' is an error
 	    if s.endswith(')'):
 		print("'%s' missing terminating semicolon" % l, file=sys.stderr)
 		s = s + ';'
 		exit(1)
 	    if ';' in s:
 		synopsis_for_prototype(funcsynopsis, s)
 		s = ''
    if s.strip():
 	print("surplus synopsis '%s'" % s, file=sys.stderr)
 	raise
 def synopsis_for_prototype(funcsynopsis, s):
    s = s.strip()
    # funcsynopsis has a very detailed content model, so we need to massage the
    # bare prototype into it.  Fortunately, since the parameter names are marked
    # up, we have enough information to do this.
    for fp in s.split(';'):
 	fp = fp.strip()
 	if fp:
 	    if verbose:
 		print("'%s'" % fp, file=sys.stderr)
 	    match = re.match(r'(.*?)([\w\d]*) ?\((.*)\)', fp)
 	    if verbose:
 		print(match.groups(), file=sys.stderr)
 	    funcprototype = lxml.etree.SubElement(funcsynopsis, 'funcprototype')
 	    funcdef = lxml.etree.SubElement(funcprototype, 'funcdef')
 	    funcdef.text = match.group(1)
 	    function = lxml.etree.SubElement(funcdef, 'function')
 	    function.text = match.group(2)
 	    if match.group(3).strip() == 'void':
 		void = lxml.etree.SubElement(funcprototype, 'void')
 	    else:
 		# Split parameters on ',' except if it is inside ()
 		for p in re.split(',(?![^()]*\))', match.group(3)):
 		    p = p.strip()
 		    if verbose:
 			print(p, file=sys.stderr)
 		    if p == '...':
 			varargs = lxml.etree.SubElement(funcprototype, 'varargs')
 		    else:
 			paramdef = lxml.etree.SubElement(funcprototype, 'paramdef')
 			parameter = lxml.etree.SubElement(paramdef, 'parameter')
 			# <[ ]> enclose the parameter name
 			match2 = re.match('(.*)<\[(.*)\]>(.*)', p)
 			if verbose:
 			    print(match2.groups(), file=sys.stderr)
 			paramdef.text = match2.group(1)
 			parameter.text = match2.group(2)
 			parameter.tail = match2.group(3)
 # DESCRIPTION
 # (RETURNS, ERRORS, PORTABILITY, BUGS, WARNINGS, SEEALSO, NOTES  are handled the same)
 #
 # Create a refsect with a title corresponding to the command
 #
 # Nearly all the the existing DESCRIPTION contents could be transformed into
 # DocBook with a few regex substitutions.  Unfortunately, pages like sprintf and
 # sscanf, have very complex layout using nested tables and itemized lists, which
 # it is best to parse in order to transform correctly.
 #
 def refsect(t, s):
    refsect = lxml.etree.SubElement(refentry, 'refsect1')
    title = lxml.etree.SubElement(refsect, 'title')
    title.text = t.title()
    if verbose:
 	print('%s has %d paragraphs' % (t, len(s.split('\n\n'))) , file=sys.stderr)
    if verbose > 1:
 	dump(s, 'before lexing')
 	# dump out lexer token sequence
 	lex.input(s)
 	for tok in lexer:
 	    print(tok, file=sys.stderr)
    # parse the section text for makedoc markup and the few pieces of texinfo
    # markup we understand, and output an XML marked-up string
    xml = parser.parse(s, tracking=True, debug=(verbose > 2))
    dump(xml, 'after parsing')
    xml = '<refsect1>' + xml + '</refsect1>'
    refsect.extend(lxml.etree.fromstring(xml))
 def seealso(c, t):
    refsect('SEE ALSO', t)
 # NEWPAGE
 #
 # start a new refentry
 def newpage(c, t):
    global refentry
    refentry = None
 # command dispatch table
 def discarded(c, t):
    return
 command_dispatch_dict = {
    'FUNCTION'		: function,
    'TYPEDEF'		: function,	# TYPEDEF is not currently used, but described in doc.str
    'INDEX'		: index,
    'TRAD_SYNOPSIS'	: discarded,	# K&R-style synopsis, obsolete and discarded
    'ANSI_SYNOPSIS'	: synopsis,
    'SYNOPSIS'		: synopsis,
    'DESCRIPTION'	: refsect,
    'RETURNS'		: refsect,
    'ERRORS'		: refsect,
    'PORTABILITY'	: refsect,
    'BUGS'		: refsect,
    'WARNINGS'		: refsect,
    'SEEALSO'		: seealso,
    'NOTES'		: refsect,	# NOTES is not described in doc.str, so is currently discarded by makedoc, but that doesn't seem right
    'QUICKREF'		: discarded,	# The intent of QUICKREF and MATHREF is not obvious, but they don't generate any output currently
    'MATHREF'		: discarded,
    'START'		: discarded,	# a START command is inserted to contain the text before the first command
    'END'		: discarded,	# an END command is inserted merely to terminate the text for the last command in a comment block
    'NEWPAGE'		: newpage,
 }
 #
 # Utility functions
 #
 # apply transformations which are easy to do in-place
 def line_markup_convert(p):
    s = p;
    # process the texinfo escape for an @
    s = s.replace('@@', '@')
    # escape characters not allowed in XML
    s = s.replace('&','&amp;')
    s = s.replace('<','&lt;')
    s = s.replace('>','&gt;')
    # convert <<somecode>> to <code>somecode</code> and <[var]> to
    # <varname>var</varname>
    # also handle nested << <[ ]> >> correctly
    s = s.replace('&lt;&lt;','<code>')
    s = s.replace('&lt;[','<varname>')
    s = s.replace(']&gt;','</varname>')
    s = s.replace('&gt;&gt;','</code>')
    # also convert some simple texinfo markup
    # convert @emph{foo} to <emphasis>foo</emphasis>
    s = re.sub('@emph{(.*?)}', '<emphasis>\\1</emphasis>', s)
    # convert @minus{} to U+2212 MINUS SIGN
    s = s.replace('@minus{}', '&#x2212;')
    # convert @dots{} to U+2026 HORIZONTAL ELLIPSIS
    s = s.replace('@dots{}', '&#x2026;')
    # convert xref and pxref
    s = re.sub('@xref{(.*?)}', "See <xref linkend='\\1'/>", s)
    # very hacky way of dealing with @* to force a newline
    s = s.replace('@*', '</para><para>')
    if (verbose > 3) and (s != p):
 	print('%s-> line_markup_convert ->\n%s' % (p, s), file=sys.stderr)
    return s
 #
 # lexer
 #
 texinfo_commands = {
    'ifnottex' : 'IFNOTTEX',
    'end ifnottex' : 'ENDIFNOTTEX',
    'tex' : 'IFTEX',
    'end tex' : 'ENDIFTEX',
    'comment' : 'COMMENT',
    'c ' : 'COMMENT',
    'multitable' : 'MULTICOLUMNTABLE',
    'end multitable' : 'ENDMULTICOLUMNTABLE',
    'headitem' : 'MCT_HEADITEM',
    'tab' : 'MCT_COLUMN_SEPARATOR',
    'item' : 'MCT_ITEM',
    }
 # token names
 tokens = [
    'BLANKLINE',
    'BULLETEND',
    'BULLETSTART',
    'COURIER',
    'EOF',
    'ITEM',
    'TABLEEND',
    'TABLESTART',
    'TEXINFO',
    'TEXT',
 ] + list(set(texinfo_commands.values()))
 # regular expression rules for tokens, in priority order
 # (all these expressions should match a whole line)
 def t_TEXINFO(t):
    # this matches any @command. but not @command{} which just happens to be at
    # the start of a line
    r'@\w+[^{]*?\n'
    # if the line starts with a known texinfo command, change t.type to the
    # token for that command
    for k in texinfo_commands.keys():
 	if t.value[1:].startswith(k):
 	    t.type = texinfo_commands[k]
 	    break
    return t
 def t_COURIER(t):
    r'[.|].*\n'
    t.value = line_markup_convert(t.value[1:])
    return t
 def t_BULLETSTART(t):
    r'O\+\n'
    return t
 def t_BULLETEND(t):
    r'O-\n'
    return t
 def t_TABLESTART(t):
    r'o\+\n'
    return t
 def t_TABLEEND(t):
    r'o-\n'
    return t
 def t_ITEM(t):
    r'o\s.*\n'
    t.value = re.sub('o\s', '', lexer.lexmatch.group(0), 1)
    t.value = line_markup_convert(t.value)
    return t
 def t_TEXT(t):
    r'.+\n'
    t.value = line_markup_convert(t.value)
    t.lexer.lineno += 1
    return t
 def t_BLANKLINE(t):
    r'\n'
    t.lexer.lineno += 1
    return t
 def t_eof(t):
    if hasattr(t.lexer,'at_eof'):
 	# remove eof flag ready for lexing next input
 	delattr(t.lexer,'at_eof')
 	t.lexer.lineno = 0
 	return None
    t.type = 'EOF'
    t.lexer.at_eof = True;
    return t
 # Error handling rule
 def t_error(t):
    print("tokenization error, remaining text '%s'" % t.value, file=sys.stderr)
    exit(1)
 lexer = lex.lex()
 #
 # parser
 #
 def parser_verbose(p):
    if verbose > 2:
 	print(p[0], file=sys.stderr)
 def p_input(p):
    '''input : paragraph
             | input paragraph'''
    if len(p) == 3:
 	p[0] = p[1] + '\n' + p[2]
    else:
 	p[0] = p[1]
    parser_verbose(p)
 # Strictly, text at top level should be paragraphs (i.e terminated by a
 # BLANKLINE), while text contained in rows or bullets may not be, but this
 # grammar doesn't enforce that for simplicity's sake.
 def p_paragraph(p):
    '''paragraph : paragraph_content maybe_eof_or_blankline'''
    p[0] = '<para>\n' + p[1] + '</para>'
    parser_verbose(p)
 def p_paragraph_content(p):
    '''paragraph_content : paragraph_line
                         | paragraph_line paragraph_content'''
    if len(p) == 3:
 	p[0] = p[1] + p[2]
    else:
 	p[0] = p[1]
    parser_verbose(p)
 def p_paragraph_line(p):
    '''paragraph_line : TEXT
                      | texinfocmd
                      | courierblock
                      | table
                      | bulletlist'''
    p[0] = p[1]
 def p_empty(p):
    'empty :'
    p[0] = ''
 def p_maybe_eof_or_blankline(p):
    '''maybe_eof_or_blankline : empty
                              | EOF
                              | BLANKLINE
                              | BLANKLINE EOF'''
    p[0] = ''
 def p_maybe_lines(p):
    '''maybe_lines : empty
                   | paragraph maybe_lines'''
    if len(p) == 3:
 	p[0] = p[1] + p[2]
    else:
 	p[0] = p[1]
    parser_verbose(p)
 def p_maybe_blankline(p):
    '''maybe_blankline : empty
                       | BLANKLINE'''
    p[0] = ''
 def p_courierblock(p):
    '''courierblock : courier'''
    p[0] = '<literallayout class="monospaced">' + p[1] + '</literallayout>'
    parser_verbose(p)
 def p_courier(p):
    '''courier : COURIER
               | COURIER courier'''
    if len(p) == 3:
 	p[0] = p[1] + p[2]
    else:
 	p[0] = p[1]
    parser_verbose(p)
 def p_bullet(p):
    '''bullet : ITEM maybe_lines
              | ITEM BLANKLINE maybe_lines'''
    if len(p) == 3:
 	# Glue any text in ITEM into the first para of maybe_lines
 	# (This is an unfortunate consequence of the line-based tokenization we do)
 	if p[2].startswith('<para>'):
 	    p[0] = '<listitem><para>' + p[1] + p[2][len('<para>'):] + '</listitem>'
 	else:
 	    p[0] = '<listitem><para>' + p[1] + '</para>' + p[2] + '</listitem>'
    else:
 	p[0] = '<listitem><para>' + p[1] + '</para>' + p[3] + '</listitem>'
    parser_verbose(p)
 def p_bullets(p):
    '''bullets : bullet
               | bullet bullets'''
    if len(p) == 3:
 	p[0] = p[1] + '\n' + p[2]
    else:
 	p[0] = p[1]
    parser_verbose(p)
 def p_bulletlist(p):
    '''bulletlist : BULLETSTART bullets BULLETEND maybe_blankline'''
    p[0] = '<itemizedlist>' + p[2] + '</itemizedlist>'
    parser_verbose(p)
 def p_row(p):
    '''row : ITEM maybe_lines
           | ITEM BLANKLINE maybe_lines'''
    if len(p) == 3:
 	p[0] = '<row><entry><code>' + p[1] + '</code></entry><entry>' + p[2] + '</entry></row>'
    else:
 	p[0] = '<row><entry><code>' + p[1] + '</code></entry><entry>' + p[3] + '</entry></row>'
    parser_verbose(p)
 def p_rows(p):
    '''rows : row
            | row rows'''
    if len(p) == 3:
 	p[0] = p[1] + '\n' + p[2]
    else:
 	p[0] = p[1]
    parser_verbose(p)
 def p_table(p):
    '''table : TABLESTART rows TABLEEND maybe_blankline'''
    p[0] = '<informaltable><tgroup cols="2"><tbody>' + p[2] + '</tbody></tgroup></informaltable>'
    parser_verbose(p)
 def p_texinfocmd(p):
    '''texinfocmd : unknown_texinfocmd
                  | comment
                  | multitable
                  | nottex
                  | tex'''
    p[0] = p[1]
 def p_unknown_texinfocmd(p):
    '''unknown_texinfocmd : TEXINFO'''
    print("unknown texinfo command '%s'" % p[1].strip(), file=sys.stderr)
    p[0] = p[1]
    parser_verbose(p)
 def p_nottex(p):
    '''nottex : IFNOTTEX paragraph_content ENDIFNOTTEX'''
    p[0] = p[2]
 def p_tex(p):
    '''tex : IFTEX paragraph_content ENDIFTEX'''
    # text for TeX formatter inside @iftex is discarded
    p[0] = ''
 def p_comment(p):
    '''comment : COMMENT'''
    # comment text is discarded
    p[0] = ''
 def p_mct_columns(p):
    '''mct_columns : maybe_lines
                   | maybe_lines MCT_COLUMN_SEPARATOR mct_columns'''
    if len(p) == 4:
 	p[0] = '<entry>' + p[1] + '</entry>' + p[3]
    else:
 	p[0] = '<entry>' + p[1] + '</entry>'
    parser_verbose(p)
 def p_mct_row(p):
    '''mct_row : MCT_ITEM mct_columns'''
    p[0] = '<row>' + p[2] + '</row>'
    parser_verbose(p)
 def p_mct_rows(p):
    '''mct_rows : mct_row
                | mct_row mct_rows'''
    if len(p) == 3:
 	p[0] = p[1] + '\n' + p[2]
    else:
 	p[0] = p[1]
    parser_verbose(p)
 def p_mct_header(p):
    '''mct_header : MCT_HEADITEM mct_columns'''
    p[0] = '<row>' + p[2] + '</row>'
    parser_verbose(p)
 def p_multitable(p):
    '''multitable : MULTICOLUMNTABLE mct_header mct_rows ENDMULTICOLUMNTABLE'''
    # this doesn't handle the prototype row form of @multitable, only the @columnfractions form
    colfrac = p[1].replace('@multitable @columnfractions', '').split()
    colspec = '\n'.join(['<colspec colwidth="%s*"/>' % (c) for c in colfrac])
    header = '<thead>' + p[2] + '</thead>\n'
    body = '<tbody>' + p[3] + '</tbody>\n'
    p[0] = '<informaltable><tgroup cols="' + str(len(colfrac)) +'">' + colspec + header + body  + '</tgroup></informaltable>'
    parser_verbose(p)
 def p_error(t):
    print('parse error at line %d, token %s, next token %s' % (t.lineno, t, parser.token()), file=sys.stderr)
    exit(1)
 parser = yacc.yacc(start='input')
 #
 #
 #
 def main(file):
    content = file.read()
    content = remove_noncomments(content)
    processed = process(content)
    perform(processed)
    # output the XML tree
    s = lxml.etree.tostring(rootelement, pretty_print=True)
    if not s:
 	print('No output produced (perhaps the input has no makedoc markup?)', file=sys.stderr)
 	exit(1)
    print(s)
    # warn about texinfo commands which didn't get processed
    match = re.search('@[a-z*]+', s)
    if match:
 	print('texinfo command %s remains in output' % match.group(), file=sys.stderr)
 #
 #
 #
 if __name__ == '__main__' :
    options = OptionParser()
    options.add_option('-v', '--verbose', action='count', dest = 'verbose')
    (opts, args) = options.parse_args()
    verbose = opts.verbose
    if len(args) > 0:
 	main(open(args[0], 'rb'))
    else:
 	main(sys.stdin)
--- a/newlib/libc/Makefile.am
+++ b/newlib/libc/Makefile.am
@ -142,6 +142,9 @@ SUBDEFS = \
 	$(LIBC_EXTRA_DEF) \
 	misc/stmp-def
 # ditto for stmp-xml files in each subdirectory which builds .xml files
 SUBXMLS = $(SUBDEFS:stmp-def=stmp-xml)
 libc.info: sigset.texi extra.texi stdio64.texi posix.texi iconvset.texi \
 	targetdep.tex $(SUBDEFS)
@ -223,6 +226,23 @@ info_TEXINFOS = libc.texinfo
 libc_TEXINFOS = sigset.texi extra.texi posix.texi stdio64.texi iconvset.texi \
 	targetdep.tex $(SUBDEFS)
 docbook-recursive: force
 	for d in $(SUBDIRS); do \
 	  if test "$$d" != "."; then \
 	    (cd $$d && $(MAKE) docbook) || exit 1; \
 	  fi; \
 	done
 $(SUBXMLS): docbook-recursive
 man: $(SUBXMLS) libc.in.xml
 	xsltproc --xinclude --path ${builddir} --nonet ${srcdir}/../refcontainers.xslt ${srcdir}/libc.in.xml >libc.xml
 	xmlto --skip-validation man -m ${srcdir}/../man.xsl libc.xml
 install-man: man
 	mkdir -p $(DESTDIR)$(mandir)/man3
 	$(INSTALL_DATA) *.3 $(DESTDIR)$(mandir)/man3
 .PHONY: force
 force:
@ -230,7 +250,8 @@ CLEANFILES = $(CRT0) \
 	sigset.texi stmp-sigset extra.texi stmp-extra \
 	stdio64.texi stmp-stdio64 targetdep.tex stmp-targetdep \
 	tmp-sigset.texi tmp-iconvset.texi tmp-extra.texi \
-	tmp-stdio64.texi tmp-posix.texi tmp-targetdep.texi
+	tmp-stdio64.texi tmp-posix.texi tmp-targetdep.texi \
 	*.xml *.3
 ACLOCAL_AMFLAGS = -I .. -I ../..
 CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host
--- a/newlib/libc/iconv/Makefile.am
+++ b/newlib/libc/iconv/Makefile.am
@ -18,6 +18,9 @@ CHAPTERS = iconv.tex
 iconv.def: lib/iconv.def
 	cp lib/iconv.def iconv.def
 iconv.xml: lib/iconv.xml
 	cp lib/iconv.xml iconv.xml
 stmp-def: force
 	(cd lib && $(MAKE) doc)
 	touch $@
--- a/newlib/libc/libc.in.xml
+++ b/newlib/libc/libc.in.xml
@ -0,0 +1,42 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
 <book id="libc" xmlns:xi="http://www.w3.org/2001/XInclude">
  <bookinfo>
    <productname>newlib</productname>
  </bookinfo>
  <xi:include href="stdlib.xml"/>
  <xi:include href="ctype.xml"/>
  <xi:include href="stdio.xml"/>
  <!-- stdio64 is optional -->
  <xi:include href="stdio64.xml">
    <xi:fallback/>
  </xi:include>
  <xi:include href="strings.xml"/>
  <xi:include href="wcstrings.xml"/>
  <!-- signals is optional -->
  <xi:include href="signal.xml">
    <xi:fallback/>
  </xi:include>
  <xi:include href="time.xml"/>
  <xi:include href="locale.xml"/>
  <!-- reent.tex contains fixed content and nothing seems to include the .def made in reent/ -->
  <xi:include href="misc.xml"/>
  <!-- posix is optional -->
  <xi:include href="posix.xml">
      <xi:fallback/>
  </xi:include>
  <!-- XXX: stdarg.h and vararg.h are directly described in libc.texinfo -->
  <!-- iconv is optional -->
  <xi:include href="iconv.xml">
      <xi:fallback/>
  </xi:include>
  <!-- processing should insert index here -->
  <index/>
 </book>
--- a/newlib/libc/machine/Makefile.am
+++ b/newlib/libc/machine/Makefile.am
@ -22,5 +22,7 @@ $(machine_dir)/lib.a:
 doc:
 docbook:
 ACLOCAL_AMFLAGS = -I ../.. -I ../../..
 CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host
--- a/newlib/libc/sys/Makefile.am
+++ b/newlib/libc/sys/Makefile.am
@ -33,5 +33,7 @@ CLEANFILES = $(CRT0)
 doc:
 docbook:
 ACLOCAL_AMFLAGS = -I ../.. -I ../../..
 CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host
--- a/newlib/libm/Makefile.am
+++ b/newlib/libm/Makefile.am
@ -58,10 +58,27 @@ math/stmp-def: stmp-targetdep ; @true
 complex/stmp-def: stmp-targetdep ; @true
 docbook-recursive: force
 	for d in $(SUBDIRS); do \
 	  if test "$$d" != "."; then \
 	    (cd $$d && $(MAKE) docbook) || exit 1; \
 	  fi; \
 	done
 math/stmp-xml complex/stmp-xml: docbook-recursive
 man: math/stmp-xml complex/stmp-xml libm.in.xml
 	xsltproc --xinclude --path ${builddir} --nonet ${srcdir}/../refcontainers.xslt ${srcdir}/libm.in.xml >libm.xml
 	xmlto --skip-validation --searchpath ${builddir} man -m ${srcdir}/../man.xsl libm.xml
 install-man: man
 	mkdir -p $(DESTDIR)$(mandir)/man3
 	$(INSTALL_DATA) *.3 $(DESTDIR)$(mandir)/man3/
 .PHONY: force
 force:
-CLEANFILES = tmp.texi targetdep.tex stmp-targetdep
+CLEANFILES = tmp.texi targetdep.tex stmp-targetdep *.xml *.3
 ACLOCAL_AMFLAGS = -I .. -I ../..
 CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host
--- a/newlib/libm/libm.in.xml
+++ b/newlib/libm/libm.in.xml
@ -0,0 +1,14 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
 <book id="libm" xmlns:xi="http://www.w3.org/2001/XInclude">
  <bookinfo>
    <productname>newlib</productname>
  </bookinfo>
  <xi:include href="complex.xml"/>
  <xi:include href="math.xml"/>
  <!-- processing should insert index here -->
  <index/>
 </book>
--- a/newlib/libm/machine/Makefile.am
+++ b/newlib/libm/machine/Makefile.am
@ -22,5 +22,7 @@ $(libm_machine_dir)/lib.a:
 doc:
 docbook:
 ACLOCAL_AMFLAGS = -I ../.. -I ../../..
 CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host
--- a/newlib/man.xsl
+++ b/newlib/man.xsl
@ -0,0 +1,13 @@
 <?xml version='1.0'?>
 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version='1.0'>
 <!-- don't truncate long manual names -->
 <xsl:param name="man.th.extra3.max.length" select="45" />
 <!-- don't moan about missing metadata -->
 <xsl:param name="refentry.meta.get.quietly" select="1" />
 <!-- generate ansi rather than k&r style function synopses -->
 <xsl:param name="funcsynopsis.style" select="ansi" />
 </xsl:stylesheet>
--- a/newlib/refcontainers.xslt
+++ b/newlib/refcontainers.xslt
@ -0,0 +1,14 @@
 <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 <!-- trivial XSLT which removes the refentrycontainer layer -->
 <xsl:output method="xml" encoding="UTF-8" indent="yes" doctype-public="-//OASIS//DTD DocBook V4.5//EN" doctype-system="http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" />
 <xsl:strip-space elements="*"/>
 <!-- Whenever you match any node but refentrycontainer or any attribute -->
 <xsl:template match="node()[not(self::refentrycontainer)]|@*">
 <!-- Copy the current node -->
  <xsl:copy>
    <!-- Including any attributes it has and any child nodes -->
   <xsl:apply-templates select="node()|@*"/>
  </xsl:copy>
 </xsl:template>
 </xsl:stylesheet>