commit 9c7dc6a663654a00ae89306c9c129172c6387935
parent d0ac640f137cf2b6eee7ece44b5ed068dd67de83
Author: Nils Gillmann <ng0@n0.is>
Date: Sun, 7 Oct 2018 23:14:33 +0000
BibTeX.py: style related changes, change ERROR message format output to be more helpful with large amounts of errors.
Signed-off-by: Nils Gillmann <ng0@n0.is>
Diffstat:
| M | BibTeX.py | | | 125 | +++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------- |
1 file changed, 81 insertions(+), 44 deletions(-)
diff --git a/BibTeX.py b/BibTeX.py
@@ -14,7 +14,6 @@ import os
import copy
import config
-
import rank
__all__ = ['ParseError', 'BibTeX', 'BibTeXEntry', 'htmlize',
@@ -292,7 +291,7 @@ DISPLAYED_FIELDS = ['title', 'author', 'journal', 'booktitle',
class BibTeXEntry:
"""A single BibTeX entry."""
def __init__(self, type, key, entries):
- self.type = type # What kind of entry is it? (@book,@injournal,etc)
+ self.type = type # Kind of entry: @book, @injournal,etc
self.key = key # What key does it have?
self.entries = entries # Map from key to value.
self.entryLine = 0 # Defined on this line number
@@ -381,6 +380,11 @@ class BibTeXEntry:
# Besides the official BibTeX resources, this is a good
# reference point: https://verbosus.com/bibtex-style-examples.html
def _check(self):
+ """
+ The message 'in record %s' relates to the entire bibtex record,
+ giving a means to locate by searching.
+ FIXME: Really print the line in the '.bib' file.
+ """
errs = []
if self.type == 'inproceedings':
fields = 'booktitle', 'year'
@@ -392,54 +396,63 @@ class BibTeXEntry:
fields = 'journal', 'year'
elif self.type == 'book':
fields = 'title', 'year'
- # elif self.type == 'booklet':
- # fields = (),
+ elif self.type == 'booklet':
+ fields = 'title', 'year'
elif self.type == 'techreport':
fields = 'institution',
elif self.type == 'misc':
fields = 'howpublished',
- # elif self.type == 'conference':
- # fields = 'booktitle', 'year',
+ elif self.type == 'conference':
+ fields = 'booktitle', 'year'
elif self.type in ('mastersthesis', 'phdthesis'):
fields = ()
else:
fields = ()
- errs.append("ERROR: odd type %s"%self.type)
- if self.type != 'proceedings':
+ errs.append("ERROR (record %s):\t odd type %s"
+ % (self.entryLine, self.type))
+ if self.type != ('proceedings' or 'conference'):
fields += 'title', 'author', 'www_section', 'year'
for field in fields:
if self.get(field) is None or \
self.get(field).startswith("<span class='bad'>"):
- errs.append("ERROR: %s has no %s field" % (self.key, field))
+ errs.append("ERROR (record %s):\t %s field"
+ "\tnot found in\t %s"
+ % (self.entryLine, field, self.key))
self.entries[field] = "<span class='bad'>%s:??</span>"%field
if self.type == 'inproceedings':
if self.get("booktitle"):
if not self['booktitle'].startswith("Proceedings of") and \
not self['booktitle'].startswith("{Proceedings of"):
- errs.append("ERROR: %s's booktitle (%r) doesn't start with 'Proceedings of'" % (self.key, self['booktitle']))
+ errs.append("ERROR (record %s):\t %s's booktitle (%r) doesn't start with 'Proceedings of'"
+ % (self.entryLine, selfself.key, self['booktitle']))
if self.has_key("pages") and not re.search(r'\d+--\d+', self['pages']):
- errs.append("ERROR: Misformed pages in %s"%self.key)
+ errs.append("ERROR (record %s):\t Misformed pages in %s"
+ % (self.entryLine, self.key))
if self.type == 'proceedings':
if self.get('title'):
- errs.append("ERROR: %s is a proceedings: it should have a booktitle, not a title." % self.key)
+ errs.append("ERROR (record %s):\t %s is a proceedings: it should have a booktitle, not a title."
+ % (self.entryLine, self.key))
for field, value in self.entries.items():
if value.translate(ALLCHARS, PRINTINGCHARS):
- errs.append("ERROR: %s.%s has non-ASCII characters"%(
- self.key, field))
+ errs.append("ERROR (record %s):\t %s.%s has non-ASCII characters"
+ % (self.entryLine, self.key, field))
if field.startswith("www_") and field not in WWW_FIELDS:
- errs.append("ERROR: unknown www field %s"% field)
+ errs.append("ERROR (record %s):\t unknown www field %s" % (self.entryLine, field))
if value.strip()[-1:] == '.' and \
field not in ("notes", "www_remarks", "author"):
- errs.append("ERROR: %s.%s has an extraneous period"%(self.key, field))
+ errs.append("ERROR (record %s):\t %s.%s has an extraneous period"
+ % (self.entryLine, self.key, field))
return errs
def biblio_to_html(self):
- """Return the HTML for the citation portion of entry."""
+ """
+ Return the HTML for the citation portion of entry.
+ """
if self.type in ('inproceedings', 'incollection'):
booktitle = self['booktitle']
bookurl = self.get('bookurl')
@@ -550,15 +563,17 @@ class BibTeXEntry:
res = ["<li><p class='entry'>"]
if imp or not draft:
- # Add a picture of the rank
- # Only if year is known or paper important!
+ """
+ Add a picture of the rank
+ Only if year is known or paper important!
+ """
r = rank.get_rank_html(self['title'], self.get('year'),
update=False, base_url=base_url)
if r is not None:
res.append(r)
- res.append("<span class='title'><a name='%s'>%s</a></span>"%(
- url_untranslate(self.key), htmlize(self['title'])))
+ res.append("<span class='title'><a name='%s'>%s</a></span>"
+ %(url_untranslate(self.key), htmlize(self['title'])))
for cached in 0, 1:
availability = []
@@ -568,13 +583,13 @@ class BibTeXEntry:
if self.get(key):
url = self[key]
url = unTeXescapeURL(url)
- availability.append('<a href="%s">%s</a>' %(url, which))
+ availability.append('<a href="%s">%s</a>'
+ % (url, which))
cache_section = self.get('www_cache_section', ".")
if cache_section not in config.CACHE_SECTIONS:
if cache_section != ".":
- print >>sys.stderr, "Unrecognized cache section %s"%(
- cache_section)
+ print >>sys.stderr, "Unrecognized cache section %s"%(cache_section)
cache_section = "."
for key, name, ext in (('www_abstract_url', 'abstract', 'abstract'),
@@ -590,7 +605,7 @@ class BibTeXEntry:
"%s.%s"%(self.key, ext))
fname = smartJoin(config.OUTPUT_DIR, config.CACHE_DIR,
cache_section,
- "%s.%s"%(self.key, ext))
+ "%s.%s" % (self.key, ext))
if not os.path.exists(fname): continue
else:
url = self.get(key)
@@ -628,8 +643,8 @@ class BibTeXEntry:
res.append("</p>")
if self.get('www_remarks'):
- res.append("<p class='remarks'>%s</p>"%htmlize(
- self['www_remarks']))
+ res.append("<p class='remarks'>%s</p>"
+ %htmlize(self['www_remarks']))
if imp or draft:
res.append("</div>")
@@ -638,7 +653,9 @@ class BibTeXEntry:
return "".join(res)
def unTeXescapeURL(s):
- """Turn a URL as formatted in TeX into a real URL."""
+ """
+ Turn a URL as formatted in TeX into a real URL.
+ """
s = s.replace("\\_", "_")
s = s.replace("\\-", "")
s = s.replace("\{}", "")
@@ -646,7 +663,9 @@ def unTeXescapeURL(s):
return s
def TeXescapeURL(s):
- """Escape a URL for use in TeX"""
+ """
+ Escape a URL for use in TeX
+ """
s = s.replace("_", "\\_")
s = s.replace("~", "\{}~")
return s
@@ -674,14 +693,17 @@ HTML_LIGATURE_MAP = {
'ss' : 'ß',}
RE_TEX_CMD = re.compile(r"(?:\\[a-zA-Z@]+|\\.)")
RE_PAGE_SPAN = re.compile(r"(\d)--(\d)")
+
def _unaccent(m):
accent, char = m.groups()
if char[0] == '{':
char = char[1]
accented = "&%s%s;" % (char, ACCENT_MAP[accent])
return UNICODE_MAP.get(accented, accented)
+
def _unlig_html(m):
return "%s%s"%(HTML_LIGATURE_MAP[m.group(1)], m.group(2))
+
def htmlize(s):
"""Turn a TeX string into good-looking HTML."""
s = RE_LONE_AMP.sub(lambda m: "&%s" % m.group(1), s)
@@ -715,9 +737,9 @@ def txtize(s):
PROCEEDINGS_RE = re.compile(r'((?:proceedings|workshop record) of(?: the)? )(.*)', re.I)
class ParsedAuthor:
- """The parsed name of an author.
-
- Eddie deserves credit for this incredibly hairy business.
+ """
+ The parsed name of an author.
+ Eddie deserves credit for this incredibly hairy business.
"""
def __init__(self, first, von, last, jr):
self.first = first
@@ -745,7 +767,9 @@ class ParsedAuthor:
return hash(repr(self))
def collapsesTo(self, o):
- """Return true iff 'o' could be a more canonical version of this author
+ """
+ Return true iff 'o' could be a more canonical version of
+ this author
"""
if not self.collapsable or not o.collapsable:
return self
@@ -824,8 +848,10 @@ class ParsedAuthor:
return None
def getSortingName(self):
- """Return a representation of this author's name in von-last-first-jr
- order, unless overridden by ALPH """
+ """
+ Return a representation of this author's name in von-last-first-jr
+ order, unless overridden by ALPH
+ """
s = self.html
for pat, v in config.ALPHABETIZE_AUTHOR_AS_RE_LIST:
if pat.search(s):
@@ -901,7 +927,9 @@ def parseAuthor(s):
raise
def _parseAuthor(s):
- """Take an author string and return a list of ParsedAuthor."""
+ """
+ Take an author string and return a list of ParsedAuthor.
+ """
items = []
s = s.strip()
@@ -995,7 +1023,9 @@ def split_von(f, v, l, x):
class Parser:
- """Parser class: reads BibTeX from a file and returns a BibTeX object."""
+ """
+ Parser class: reads BibTeX from a file and returns a BibTeX object.
+ """
## Fields
# strings: maps entry string keys to their values.
# newStrings: all string definitions not in config.INITIAL_STRINGS
@@ -1026,7 +1056,8 @@ class Parser:
line = _advance(it, line)
m = KEY_RE.match(line)
if not m:
- raise ParseError("Expected key at line %s"%self.fileiter.lineno)
+ raise ParseError("Expected key at line %s"
+ % self.fileiter.lineno)
key, line = m.groups()
return key, line
@@ -1174,7 +1205,8 @@ class Parser:
if proto and proto[1:] != '*':
proto = proto[1:]
if proto and proto[1:] != '*':
- raise ParseError("Missing arguments to %s on line %s" % (self.curEntType, self.entryLine))
+ raise ParseError("Missing arguments to %s on line %s"
+ % (self.curEntType, self.entryLine))
if self.curEntType == 'string':
self.strings[v[0]] = v[1]
@@ -1198,8 +1230,8 @@ class Parser:
self._parse()
except StopIteration:
if self.litStringLine:
- raise ParseError("Unexpected EOF in string (started on %s)" %
- self.litStringLine)
+ raise ParseError("Unexpected EOF in string (started on %s)"
+ % self.litStringLine)
elif self.entryLine:
raise ParseError("Unexpected EOF at line %s (entry started "
"on %s)" % (self.fileiter.lineno,
@@ -1225,7 +1257,8 @@ class Parser:
line = self._parseEntry(line)
self.entryLine = 0
else:
- raise ParseError("Bad input at line %s (expected a new entry.)"
+ raise ParseError("Bad input at line %s "
+ "(expected a new entry.)"
% it.lineno)
def _advance(it, line):
@@ -1251,7 +1284,9 @@ BRACE_OPEN_RE = re.compile(r'^([^\{\}]*\{)(.*)')
RAW_DATA_RE = re.compile(r'^([^\s\},]+)(.*)')
def parseFile(filename, result=None):
- """Helper function: parse a single BibTeX file"""
+ """
+ Helper function: parse a single BibTeX file
+ """
f = FileIter(fname=filename)
p = Parser(f, {}, result)
r = p.parse()
@@ -1261,7 +1296,9 @@ def parseFile(filename, result=None):
return r
def parseString(string, result=None):
- """Helper function: parse BibTeX from a string"""
+ """
+ Helper function: parse BibTeX from a string
+ """
f = FileIter(string=string)
p = Parser(f, {}, result)
r = p.parse()