4
0
mirror of git://sourceware.org/git/newlib-cygwin.git synced 2025-01-19 12:59:21 +08:00

doc: Untabify python scripts used for making man pages

These scripts fail with a TabError exception if 'python' is python3, as
mixing tabs and spaces in indentation is forbidden in python3.
This commit is contained in:
Jon Turney 2019-12-16 19:28:59 +00:00
parent 7fc78f07ed
commit 91f1eab9a9
No known key found for this signature in database
GPG Key ID: C7C86F0370285C81
2 changed files with 209 additions and 209 deletions

View File

@ -19,25 +19,25 @@ def main():
print ('<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">')
for l in sys.stdin.readlines():
l = l.rstrip()
l = l.rstrip()
# transform @file{foo} to <filename>foo</filename>
l = re.sub("@file{(.*?)}", "<filename>\\1</filename>", l)
# transform @file{foo} to <filename>foo</filename>
l = re.sub("@file{(.*?)}", "<filename>\\1</filename>", l)
if l.startswith("@node"):
l = l.replace("@node", "", 1)
l = l.strip()
l = l.lower()
if first_node:
print ('<chapter id="%s" xmlns:xi="http://www.w3.org/2001/XInclude">' % l.replace(' ', '_'))
first_node = False
elif l.startswith("@chapter "):
l = l.replace("@chapter ", "", 1)
print ('<title>%s</title>' % l)
elif l.startswith("@include "):
l = l.replace("@include ", "", 1)
l = l.replace(".def", ".xml", 1)
print ('<xi:include href="%s"/>' % l.strip())
if l.startswith("@node"):
l = l.replace("@node", "", 1)
l = l.strip()
l = l.lower()
if first_node:
print ('<chapter id="%s" xmlns:xi="http://www.w3.org/2001/XInclude">' % l.replace(' ', '_'))
first_node = False
elif l.startswith("@chapter "):
l = l.replace("@chapter ", "", 1)
print ('<title>%s</title>' % l)
elif l.startswith("@include "):
l = l.replace("@include ", "", 1)
l = l.replace(".def", ".xml", 1)
print ('<xi:include href="%s"/>' % l.strip())
print ('</chapter>')

View File

@ -35,11 +35,11 @@ verbose = 0
def dump(s, stage, threshold = 1):
if verbose > threshold:
print('*' * 40, file=sys.stderr)
print(stage, file=sys.stderr)
print('*' * 40, file=sys.stderr)
print('%s' % s, file=sys.stderr)
print('*' * 40, file=sys.stderr)
print('*' * 40, file=sys.stderr)
print(stage, file=sys.stderr)
print('*' * 40, file=sys.stderr)
print('%s' % s, file=sys.stderr)
print('*' * 40, file=sys.stderr)
#
# Stage 1
@ -48,7 +48,7 @@ def dump(s, stage, threshold = 1):
def skip_whitespace_and_stars(i, src):
while i < len(src) and (src[i].isspace() or (src[i] == '*' and src[i+1] != '/')):
i += 1
i += 1
return i
@ -60,37 +60,37 @@ def comment_contents_generator(src):
i = 0
while i < len(src) - 2:
if src[i] == '\n' and src[i+1] == '/' and src[i+2] == '*':
i = i + 3
if src[i] == '\n' and src[i+1] == '/' and src[i+2] == '*':
i = i + 3
i = skip_whitespace_and_stars(i, src)
i = skip_whitespace_and_stars(i, src)
if src[i] == '.':
i += 1
if src[i] == '.':
i += 1
while i < len(src):
if src[i] == '\n':
yield '\n'
i += 1
while i < len(src):
if src[i] == '\n':
yield '\n'
i += 1
# allow a single blank line
if i < len(src) and src[i] == '\n':
yield '\n'
i += 1
# allow a single blank line
if i < len(src) and src[i] == '\n':
yield '\n'
i += 1
i = skip_whitespace_and_stars(i, src)
i = skip_whitespace_and_stars(i, src)
elif src[i] == '*' and src[i+1] == '/':
i = i + 2
# If we have just output \n\n, this adds another blank line.
# This is the only way a double blank line can occur.
yield '\nEND\n'
break
else:
yield src[i]
i += 1
else:
i += 1
elif src[i] == '*' and src[i+1] == '/':
i = i + 2
# If we have just output \n\n, this adds another blank line.
# This is the only way a double blank line can occur.
yield '\nEND\n'
break
else:
yield src[i]
i += 1
else:
i += 1
def remove_noncomments(src):
src = '\n' + src
@ -107,7 +107,7 @@ def remove_noncomments(src):
def iscommand(l):
if re.match('^[A-Z_]{3,}\s*$', l):
return True
return True
return False
def command_block_generator(content):
@ -115,12 +115,12 @@ def command_block_generator(content):
text = ''
for l in content.splitlines():
if iscommand(l):
yield (command, text)
command = l.rstrip()
text = ''
else:
text = text + l + '\n'
if iscommand(l):
yield (command, text)
command = l.rstrip()
text = ''
else:
text = text + l + '\n'
yield (command, text)
# Look for commands, which give instructions how to process the following input
@ -142,17 +142,17 @@ def process(content):
# invoke each command on it's text
def perform(processed):
for i in processed:
c = i[0].rstrip()
t = i[1].strip() + '\n'
c = i[0].rstrip()
t = i[1].strip() + '\n'
if verbose:
print("performing command '%s'" % c, file=sys.stderr)
if verbose:
print("performing command '%s'" % c, file=sys.stderr)
if c in command_dispatch_dict:
command_dispatch_dict[c](c, t)
else:
print("command '%s' is not recognized" % c, file=sys.stderr)
# the text following an unrecognized command is discarded
if c in command_dispatch_dict:
command_dispatch_dict[c](c, t)
else:
print("command '%s' is not recognized" % c, file=sys.stderr)
# the text following an unrecognized command is discarded
# FUNCTION (aka TYPEDEF)
#
@ -162,33 +162,33 @@ def function(c, l):
l = l.strip()
if verbose:
print('FUNCTION %s' % l, file=sys.stderr)
print('FUNCTION %s' % l, file=sys.stderr)
separator = '---'
if ';' in l:
# fpclassify has an unusual format we also need to handle
spliton = ';'
l = l.splitlines()[0]
# fpclassify has an unusual format we also need to handle
spliton = ';'
l = l.splitlines()[0]
elif len(l.splitlines()) > 1:
# a few pages like mktemp have two '---' lines
spliton = ';'
o = ''
for i in l.splitlines():
if separator in i:
o += i + ';'
else:
o += i
l = o[:-1]
# a few pages like mktemp have two '---' lines
spliton = ';'
o = ''
for i in l.splitlines():
if separator in i:
o += i + ';'
else:
o += i
l = o[:-1]
else:
spliton = '\n'
spliton = '\n'
namelist = []
descrlist = []
for a in l.split(spliton):
(n, d) = a.split(separator, 1)
namelist = namelist + n.split(',')
descrlist = descrlist + [d]
(n, d) = a.split(separator, 1)
namelist = namelist + n.split(',')
descrlist = descrlist + [d]
# only copysign and log1p use <[ ]> markup in descr,
# only gets() uses << >> markup
@ -201,17 +201,17 @@ def function(c, l):
namelist = map(lambda v: v.strip().lstrip('<').rstrip('>'), namelist)
if verbose:
print(namelist, file=sys.stderr)
print(namelist, file=sys.stderr)
# additional alternate names may also appear in INDEX commands
# create the root element if needed
if rootelement is None:
rootelement = lxml.etree.Element('refentrycontainer')
rootelement = lxml.etree.Element('refentrycontainer')
# FUNCTION implies starting a new refentry
if refentry is not None:
print("multiple FUNCTIONs without NEWPAGE", file=sys.stderr)
exit(1)
print("multiple FUNCTIONs without NEWPAGE", file=sys.stderr)
exit(1)
# create the refentry
refentry = lxml.etree.SubElement(rootelement, 'refentry')
@ -232,8 +232,8 @@ def function(c, l):
refdescriptor.text = namelist[0]
# refname elements exist for all alternate names
for n in namelist:
refname = lxml.etree.SubElement(refnamediv, 'refname')
refname.text = n
refname = lxml.etree.SubElement(refnamediv, 'refname')
refname.text = n
refpurpose = lxml.etree.SubElement(refnamediv, 'refpurpose')
refnamediv.replace(refpurpose, lxml.etree.fromstring('<refpurpose>' + descr + '</refpurpose>'))
@ -249,7 +249,7 @@ def index(c, l):
l = l.strip()
if verbose:
print('INDEX %s' % l, file=sys.stderr)
print('INDEX %s' % l, file=sys.stderr)
# discard anything after the first word
l = l.split()[0]
@ -269,13 +269,13 @@ def index(c, l):
# as long as it doesn't already exist
if not refnamediv.xpath(('refname[.="%s"]') % l):
refname = lxml.etree.SubElement(refnamediv, 'refname')
refname.text = l
if verbose > 1:
print('added refname %s' % l, file=sys.stderr)
refname = lxml.etree.SubElement(refnamediv, 'refname')
refname.text = l
if verbose > 1:
print('added refname %s' % l, file=sys.stderr)
else:
if verbose > 1:
print('duplicate refname %s discarded' % l, file=sys.stderr)
if verbose > 1:
print('duplicate refname %s discarded' % l, file=sys.stderr)
# to validate, it seems we need to maintain refnamediv elements in a certain order
refnamediv[:] = sorted(refnamediv, key = lambda x: x.tag)
@ -293,28 +293,28 @@ def synopsis(c, t):
s = ''
for l in t.splitlines():
if re.match('\s*(#|\[|struct)', l):
# preprocessor # directives, structs, comments in square brackets
funcsynopsisinfo = lxml.etree.SubElement(funcsynopsis, 'funcsynopsisinfo')
funcsynopsisinfo.text = l.strip() + '\n'
elif re.match('[Ll]ink with', l):
pass
else:
s = s + l
if re.match('\s*(#|\[|struct)', l):
# preprocessor # directives, structs, comments in square brackets
funcsynopsisinfo = lxml.etree.SubElement(funcsynopsis, 'funcsynopsisinfo')
funcsynopsisinfo.text = l.strip() + '\n'
elif re.match('[Ll]ink with', l):
pass
else:
s = s + l
# a prototype without a terminating ';' is an error
if s.endswith(')'):
print("'%s' missing terminating semicolon" % l, file=sys.stderr)
s = s + ';'
exit(1)
# a prototype without a terminating ';' is an error
if s.endswith(')'):
print("'%s' missing terminating semicolon" % l, file=sys.stderr)
s = s + ';'
exit(1)
if ';' in s:
synopsis_for_prototype(funcsynopsis, s)
s = ''
if ';' in s:
synopsis_for_prototype(funcsynopsis, s)
s = ''
if s.strip():
print("surplus synopsis '%s'" % s, file=sys.stderr)
exit(1)
print("surplus synopsis '%s'" % s, file=sys.stderr)
exit(1)
def synopsis_for_prototype(funcsynopsis, s):
s = s.strip()
@ -323,48 +323,48 @@ def synopsis_for_prototype(funcsynopsis, s):
# bare prototype into it. Fortunately, since the parameter names are marked
# up, we have enough information to do this.
for fp in s.split(';'):
fp = fp.strip()
if fp:
fp = fp.strip()
if fp:
if verbose:
print("'%s'" % fp, file=sys.stderr)
if verbose:
print("'%s'" % fp, file=sys.stderr)
match = re.match(r'(.*?)([\w\d]*) ?\((.*)\)', fp)
match = re.match(r'(.*?)([\w\d]*) ?\((.*)\)', fp)
if verbose:
print(match.groups(), file=sys.stderr)
if verbose:
print(match.groups(), file=sys.stderr)
funcprototype = lxml.etree.SubElement(funcsynopsis, 'funcprototype')
funcdef = lxml.etree.SubElement(funcprototype, 'funcdef')
funcdef.text = match.group(1)
function = lxml.etree.SubElement(funcdef, 'function')
function.text = match.group(2)
funcprototype = lxml.etree.SubElement(funcsynopsis, 'funcprototype')
funcdef = lxml.etree.SubElement(funcprototype, 'funcdef')
funcdef.text = match.group(1)
function = lxml.etree.SubElement(funcdef, 'function')
function.text = match.group(2)
if match.group(3).strip() == 'void':
void = lxml.etree.SubElement(funcprototype, 'void')
else:
# Split parameters on ',' except if it is inside ()
for p in re.split(',(?![^()]*\))', match.group(3)):
p = p.strip()
if match.group(3).strip() == 'void':
void = lxml.etree.SubElement(funcprototype, 'void')
else:
# Split parameters on ',' except if it is inside ()
for p in re.split(',(?![^()]*\))', match.group(3)):
p = p.strip()
if verbose:
print(p, file=sys.stderr)
if verbose:
print(p, file=sys.stderr)
if p == '...':
varargs = lxml.etree.SubElement(funcprototype, 'varargs')
else:
paramdef = lxml.etree.SubElement(funcprototype, 'paramdef')
parameter = lxml.etree.SubElement(paramdef, 'parameter')
if p == '...':
varargs = lxml.etree.SubElement(funcprototype, 'varargs')
else:
paramdef = lxml.etree.SubElement(funcprototype, 'paramdef')
parameter = lxml.etree.SubElement(paramdef, 'parameter')
# <[ ]> enclose the parameter name
match2 = re.match('(.*)<\[(.*)\]>(.*)', p)
# <[ ]> enclose the parameter name
match2 = re.match('(.*)<\[(.*)\]>(.*)', p)
if verbose:
print(match2.groups(), file=sys.stderr)
if verbose:
print(match2.groups(), file=sys.stderr)
paramdef.text = match2.group(1)
parameter.text = match2.group(2)
parameter.tail = match2.group(3)
paramdef.text = match2.group(1)
parameter.text = match2.group(2)
parameter.tail = match2.group(3)
# DESCRIPTION
@ -384,15 +384,15 @@ def refsect(t, s):
title.text = t.title()
if verbose:
print('%s has %d paragraphs' % (t, len(s.split('\n\n'))) , file=sys.stderr)
print('%s has %d paragraphs' % (t, len(s.split('\n\n'))) , file=sys.stderr)
if verbose > 1:
dump(s, 'before lexing')
dump(s, 'before lexing')
# dump out lexer token sequence
lex.input(s)
for tok in lexer:
print(tok, file=sys.stderr)
# dump out lexer token sequence
lex.input(s)
for tok in lexer:
print(tok, file=sys.stderr)
# parse the section text for makedoc markup and the few pieces of texinfo
# markup we understand, and output an XML marked-up string
@ -421,25 +421,25 @@ def discarded(c, t):
return
command_dispatch_dict = {
'FUNCTION' : function,
'TYPEDEF' : function, # TYPEDEF is not currently used, but described in doc.str
'INDEX' : index,
'TRAD_SYNOPSIS' : discarded, # K&R-style synopsis, obsolete and discarded
'ANSI_SYNOPSIS' : synopsis,
'SYNOPSIS' : synopsis,
'DESCRIPTION' : refsect,
'RETURNS' : refsect,
'ERRORS' : refsect,
'PORTABILITY' : refsect,
'BUGS' : refsect,
'WARNINGS' : refsect,
'SEEALSO' : seealso,
'NOTES' : refsect, # NOTES is not described in doc.str, so is currently discarded by makedoc, but that doesn't seem right
'QUICKREF' : discarded, # The intent of QUICKREF and MATHREF is not obvious, but they don't generate any output currently
'MATHREF' : discarded,
'START' : discarded, # a START command is inserted to contain the text before the first command
'END' : discarded, # an END command is inserted merely to terminate the text for the last command in a comment block
'NEWPAGE' : newpage,
'FUNCTION' : function,
'TYPEDEF' : function, # TYPEDEF is not currently used, but described in doc.str
'INDEX' : index,
'TRAD_SYNOPSIS' : discarded, # K&R-style synopsis, obsolete and discarded
'ANSI_SYNOPSIS' : synopsis,
'SYNOPSIS' : synopsis,
'DESCRIPTION' : refsect,
'RETURNS' : refsect,
'ERRORS' : refsect,
'PORTABILITY' : refsect,
'BUGS' : refsect,
'WARNINGS' : refsect,
'SEEALSO' : seealso,
'NOTES' : refsect, # NOTES is not described in doc.str, so is currently discarded by makedoc, but that doesn't seem right
'QUICKREF' : discarded, # The intent of QUICKREF and MATHREF is not obvious, but they don't generate any output currently
'MATHREF' : discarded,
'START' : discarded, # a START command is inserted to contain the text before the first command
'END' : discarded, # an END command is inserted merely to terminate the text for the last command in a comment block
'NEWPAGE' : newpage,
}
#
@ -483,7 +483,7 @@ def line_markup_convert(p):
s = s.replace('@*', '</para><para>')
if (verbose > 3) and (s != p):
print('%s-> line_markup_convert ->\n%s' % (p, s), file=sys.stderr)
print('%s-> line_markup_convert ->\n%s' % (p, s), file=sys.stderr)
return s
@ -529,9 +529,9 @@ def t_TEXINFO(t):
# if the line starts with a known texinfo command, change t.type to the
# token for that command
for k in texinfo_commands.keys():
if t.value[1:].startswith(k):
t.type = texinfo_commands[k]
break
if t.value[1:].startswith(k):
t.type = texinfo_commands[k]
break
return t
@ -575,10 +575,10 @@ def t_BLANKLINE(t):
def t_eof(t):
if hasattr(t.lexer,'at_eof'):
# remove eof flag ready for lexing next input
delattr(t.lexer,'at_eof')
t.lexer.lineno = 0
return None
# remove eof flag ready for lexing next input
delattr(t.lexer,'at_eof')
t.lexer.lineno = 0
return None
t.type = 'EOF'
t.lexer.at_eof = True;
@ -598,15 +598,15 @@ lexer = lex.lex()
def parser_verbose(p):
if verbose > 2:
print(p[0], file=sys.stderr)
print(p[0], file=sys.stderr)
def p_input(p):
'''input : paragraph
| input paragraph'''
if len(p) == 3:
p[0] = p[1] + '\n' + p[2]
p[0] = p[1] + '\n' + p[2]
else:
p[0] = p[1]
p[0] = p[1]
parser_verbose(p)
# Strictly, text at top level should be paragraphs (i.e terminated by a
@ -621,9 +621,9 @@ def p_paragraph_content(p):
'''paragraph_content : paragraph_line
| paragraph_line paragraph_content'''
if len(p) == 3:
p[0] = p[1] + p[2]
p[0] = p[1] + p[2]
else:
p[0] = p[1]
p[0] = p[1]
parser_verbose(p)
def p_paragraph_line(p):
@ -649,9 +649,9 @@ def p_maybe_lines(p):
'''maybe_lines : empty
| paragraph maybe_lines'''
if len(p) == 3:
p[0] = p[1] + p[2]
p[0] = p[1] + p[2]
else:
p[0] = p[1]
p[0] = p[1]
parser_verbose(p)
def p_maybe_blankline(p):
@ -668,32 +668,32 @@ def p_courier(p):
'''courier : COURIER
| COURIER courier'''
if len(p) == 3:
p[0] = p[1] + p[2]
p[0] = p[1] + p[2]
else:
p[0] = p[1]
p[0] = p[1]
parser_verbose(p)
def p_bullet(p):
'''bullet : ITEM maybe_lines
| ITEM BLANKLINE maybe_lines'''
if len(p) == 3:
# Glue any text in ITEM into the first para of maybe_lines
# (This is an unfortunate consequence of the line-based tokenization we do)
if p[2].startswith('<para>'):
p[0] = '<listitem><para>' + p[1] + p[2][len('<para>'):] + '</listitem>'
else:
p[0] = '<listitem><para>' + p[1] + '</para>' + p[2] + '</listitem>'
# Glue any text in ITEM into the first para of maybe_lines
# (This is an unfortunate consequence of the line-based tokenization we do)
if p[2].startswith('<para>'):
p[0] = '<listitem><para>' + p[1] + p[2][len('<para>'):] + '</listitem>'
else:
p[0] = '<listitem><para>' + p[1] + '</para>' + p[2] + '</listitem>'
else:
p[0] = '<listitem><para>' + p[1] + '</para>' + p[3] + '</listitem>'
p[0] = '<listitem><para>' + p[1] + '</para>' + p[3] + '</listitem>'
parser_verbose(p)
def p_bullets(p):
'''bullets : bullet
| bullet bullets'''
if len(p) == 3:
p[0] = p[1] + '\n' + p[2]
p[0] = p[1] + '\n' + p[2]
else:
p[0] = p[1]
p[0] = p[1]
parser_verbose(p)
def p_bulletlist(p):
@ -705,18 +705,18 @@ def p_row(p):
'''row : ITEM maybe_lines
| ITEM BLANKLINE maybe_lines'''
if len(p) == 3:
p[0] = '<row><entry><code>' + p[1] + '</code></entry><entry>' + p[2] + '</entry></row>'
p[0] = '<row><entry><code>' + p[1] + '</code></entry><entry>' + p[2] + '</entry></row>'
else:
p[0] = '<row><entry><code>' + p[1] + '</code></entry><entry>' + p[3] + '</entry></row>'
p[0] = '<row><entry><code>' + p[1] + '</code></entry><entry>' + p[3] + '</entry></row>'
parser_verbose(p)
def p_rows(p):
'''rows : row
| row rows'''
if len(p) == 3:
p[0] = p[1] + '\n' + p[2]
p[0] = p[1] + '\n' + p[2]
else:
p[0] = p[1]
p[0] = p[1]
parser_verbose(p)
def p_table(p):
@ -756,9 +756,9 @@ def p_mct_columns(p):
'''mct_columns : maybe_lines
| maybe_lines MCT_COLUMN_SEPARATOR mct_columns'''
if len(p) == 4:
p[0] = '<entry>' + p[1] + '</entry>' + p[3]
p[0] = '<entry>' + p[1] + '</entry>' + p[3]
else:
p[0] = '<entry>' + p[1] + '</entry>'
p[0] = '<entry>' + p[1] + '</entry>'
parser_verbose(p)
def p_mct_row(p):
@ -770,9 +770,9 @@ def p_mct_rows(p):
'''mct_rows : mct_row
| mct_row mct_rows'''
if len(p) == 3:
p[0] = p[1] + '\n' + p[2]
p[0] = p[1] + '\n' + p[2]
else:
p[0] = p[1]
p[0] = p[1]
parser_verbose(p)
def p_mct_header(p):
@ -810,15 +810,15 @@ def main(file):
s = lxml.etree.tostring(rootelement, pretty_print=True)
if not s:
print('No output produced (perhaps the input has no makedoc markup?)', file=sys.stderr)
exit(1)
print('No output produced (perhaps the input has no makedoc markup?)', file=sys.stderr)
exit(1)
print(s)
# warn about texinfo commands which didn't get processed
match = re.search('@[a-z*]+', s)
if match:
print('texinfo command %s remains in output' % match.group(), file=sys.stderr)
print('texinfo command %s remains in output' % match.group(), file=sys.stderr)
#
#
@ -831,11 +831,11 @@ if __name__ == '__main__' :
(opts, args) = options.parse_args()
if opts.cache:
sys.exit()
sys.exit()
verbose = opts.verbose
if len(args) > 0:
main(open(args[0], 'rb'))
main(open(args[0], 'rb'))
else:
main(sys.stdin)
main(sys.stdin)