BibTeX.py: style related changes, change ERROR message format output to be more helpful with large amounts of errors. - gnunetbib

commit 9c7dc6a663654a00ae89306c9c129172c6387935
parent d0ac640f137cf2b6eee7ece44b5ed068dd67de83
Author: Nils Gillmann <ng0@n0.is>
Date:   Sun,  7 Oct 2018 23:14:33 +0000

BibTeX.py: style related changes, change ERROR message format output to be more helpful with large amounts of errors.

Signed-off-by: Nils Gillmann <ng0@n0.is>

Diffstat:
M BibTeX.py  | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------

1 file changed, 81 insertions(+), 44 deletions(-)
diff --git a/BibTeX.py b/BibTeX.py
@@ -14,7 +14,6 @@ import os
 import copy
 
 import config
-
 import rank
 
 __all__ = ['ParseError', 'BibTeX', 'BibTeXEntry', 'htmlize',
@@ -292,7 +291,7 @@ DISPLAYED_FIELDS = ['title', 'author', 'journal', 'booktitle',
 class BibTeXEntry:
     """A single BibTeX entry."""
     def __init__(self, type, key, entries):
-        self.type = type  # What kind of entry is it?  (@book,@injournal,etc)
+        self.type = type  # Kind of entry: @book, @injournal,etc
         self.key = key # What key does it have?
         self.entries = entries # Map from key to value.
         self.entryLine = 0 # Defined on this line number
@@ -381,6 +380,11 @@ class BibTeXEntry:
     # Besides the official BibTeX resources, this is a good
     # reference point: https://verbosus.com/bibtex-style-examples.html
     def _check(self):
+        """
+        The message 'in record %s' relates to the entire bibtex record,
+        giving a means to locate by searching.
+        FIXME: Really print the line in the '.bib' file.
+        """
         errs = []
         if self.type == 'inproceedings':
             fields = 'booktitle', 'year'
@@ -392,54 +396,63 @@ class BibTeXEntry:
             fields = 'journal', 'year'
         elif self.type == 'book':
             fields = 'title', 'year'
-        # elif self.type == 'booklet':
-        #     fields = (),
+        elif self.type == 'booklet':
+            fields = 'title', 'year'
         elif self.type == 'techreport':
             fields = 'institution',
         elif self.type == 'misc':
             fields = 'howpublished',
-        # elif self.type == 'conference':
-        #     fields = 'booktitle', 'year',
+        elif self.type == 'conference':
+            fields = 'booktitle', 'year'
         elif self.type in ('mastersthesis', 'phdthesis'):
             fields = ()
         else:
             fields = ()
-            errs.append("ERROR: odd type %s"%self.type)
-        if self.type != 'proceedings':
+            errs.append("ERROR (record %s):\t odd type %s"
+                        % (self.entryLine, self.type))
+        if self.type != ('proceedings' or 'conference'):
             fields += 'title', 'author', 'www_section', 'year'
 
         for field in fields:
             if self.get(field) is None or \
                    self.get(field).startswith("<span class='bad'>"):
-                errs.append("ERROR: %s has no %s field" % (self.key, field))
+                errs.append("ERROR (record %s):\t %s field"
+                            "\tnot found in\t %s"
+                            % (self.entryLine, field, self.key))
                 self.entries[field] = "<span class='bad'>%s:??</span>"%field
 
         if self.type == 'inproceedings':
             if self.get("booktitle"):
                 if not self['booktitle'].startswith("Proceedings of") and \
                    not self['booktitle'].startswith("{Proceedings of"):
-                    errs.append("ERROR: %s's booktitle (%r) doesn't start with 'Proceedings of'" % (self.key, self['booktitle']))
+                    errs.append("ERROR (record %s):\t %s's booktitle (%r) doesn't start with 'Proceedings of'"
+                                % (self.entryLine, selfself.key, self['booktitle']))
 
         if self.has_key("pages") and not re.search(r'\d+--\d+', self['pages']):
-            errs.append("ERROR: Misformed pages in %s"%self.key)
+            errs.append("ERROR (record %s):\t Misformed pages in %s"
+                        % (self.entryLine, self.key))
 
         if self.type == 'proceedings':
             if self.get('title'):
-                errs.append("ERROR: %s is a proceedings: it should have a booktitle, not a title." % self.key)
+                errs.append("ERROR (record %s):\t %s is a proceedings: it should have a booktitle, not a title."
+                            % (self.entryLine, self.key))
 
         for field, value in self.entries.items():
             if value.translate(ALLCHARS, PRINTINGCHARS):
-                errs.append("ERROR: %s.%s has non-ASCII characters"%(
-                    self.key, field))
+                errs.append("ERROR (record %s):\t %s.%s has non-ASCII characters"
+                            % (self.entryLine, self.key, field))
             if field.startswith("www_") and field not in WWW_FIELDS:
-                errs.append("ERROR: unknown www field %s"% field)
+                errs.append("ERROR (record %s):\t unknown www field %s" % (self.entryLine, field))
             if value.strip()[-1:] == '.' and \
                 field not in ("notes", "www_remarks", "author"):
-                errs.append("ERROR: %s.%s has an extraneous period"%(self.key, field))
+                errs.append("ERROR (record %s):\t %s.%s has an extraneous period"
+                            % (self.entryLine, self.key, field))
         return errs
 
     def biblio_to_html(self):
-        """Return the HTML for the citation portion of entry."""
+        """
+        Return the HTML for the citation portion of entry.
+        """
         if self.type in ('inproceedings', 'incollection'):
             booktitle = self['booktitle']
             bookurl = self.get('bookurl')
@@ -550,15 +563,17 @@ class BibTeXEntry:
             res = ["<li><p class='entry'>"]
 
         if imp or not draft:
-            # Add a picture of the rank
-            # Only if year is known or paper important!
+            """
+            Add a picture of the rank
+            Only if year is known or paper important!
+            """
             r = rank.get_rank_html(self['title'], self.get('year'),
                                    update=False, base_url=base_url)
             if r is not None:
                 res.append(r)
 
-        res.append("<span class='title'><a name='%s'>%s</a></span>"%(
-            url_untranslate(self.key), htmlize(self['title'])))
+        res.append("<span class='title'><a name='%s'>%s</a></span>"
+                   %(url_untranslate(self.key), htmlize(self['title'])))
 
         for cached in 0, 1:
             availability = []
@@ -568,13 +583,13 @@ class BibTeXEntry:
                     if self.get(key):
                         url = self[key]
                         url = unTeXescapeURL(url)
-                        availability.append('<a href="%s">%s</a>' %(url, which))
+                        availability.append('<a href="%s">%s</a>'
+                                            % (url, which))
 
             cache_section = self.get('www_cache_section', ".")
             if cache_section not in config.CACHE_SECTIONS:
                 if cache_section != ".":
-                    print >>sys.stderr, "Unrecognized cache section %s"%(
-                        cache_section)
+                    print >>sys.stderr, "Unrecognized cache section %s"%(cache_section)
                     cache_section = "."
 
             for key, name, ext in (('www_abstract_url', 'abstract', 'abstract'),
@@ -590,7 +605,7 @@ class BibTeXEntry:
                                     "%s.%s"%(self.key, ext))
                     fname = smartJoin(config.OUTPUT_DIR, config.CACHE_DIR,
                                       cache_section,
-                                      "%s.%s"%(self.key, ext))
+                                      "%s.%s" % (self.key, ext))
                     if not os.path.exists(fname): continue
                 else:
                     url = self.get(key)
@@ -628,8 +643,8 @@ class BibTeXEntry:
         res.append("</p>")
 
         if self.get('www_remarks'):
-            res.append("<p class='remarks'>%s</p>"%htmlize(
-                self['www_remarks']))
+            res.append("<p class='remarks'>%s</p>"
+                       %htmlize(self['www_remarks']))
 
         if imp or draft:
             res.append("</div>")
@@ -638,7 +653,9 @@ class BibTeXEntry:
         return "".join(res)
 
 def unTeXescapeURL(s):
-    """Turn a URL as formatted in TeX into a real URL."""
+    """
+    Turn a URL as formatted in TeX into a real URL.
+    """
     s = s.replace("\\_", "_")
     s = s.replace("\\-", "")
     s = s.replace("\{}", "")
@@ -646,7 +663,9 @@ def unTeXescapeURL(s):
     return s
 
 def TeXescapeURL(s):
-    """Escape a URL for use in TeX"""
+    """
+    Escape a URL for use in TeX
+    """
     s = s.replace("_", "\\_")
     s = s.replace("~", "\{}~")
     return s
@@ -674,14 +693,17 @@ HTML_LIGATURE_MAP = {
     'ss' : '&szlig;',}
 RE_TEX_CMD = re.compile(r"(?:\\[a-zA-Z@]+|\\.)")
 RE_PAGE_SPAN = re.compile(r"(\d)--(\d)")
+
 def _unaccent(m):
     accent, char = m.groups()
     if char[0] == '{':
         char = char[1]
     accented = "&%s%s;" % (char, ACCENT_MAP[accent])
     return UNICODE_MAP.get(accented, accented)
+
 def _unlig_html(m):
     return "%s%s"%(HTML_LIGATURE_MAP[m.group(1)], m.group(2))
+
 def htmlize(s):
     """Turn a TeX string into good-looking HTML."""
     s = RE_LONE_AMP.sub(lambda m: "&amp;%s" % m.group(1), s)
@@ -715,9 +737,9 @@ def txtize(s):
 PROCEEDINGS_RE = re.compile(r'((?:proceedings|workshop record) of(?: the)? )(.*)', re.I)
 
 class ParsedAuthor:
-    """The parsed name of an author.
-
-       Eddie deserves credit for this incredibly hairy business.
+    """
+    The parsed name of an author.
+    Eddie deserves credit for this incredibly hairy business.
     """
     def __init__(self, first, von, last, jr):
         self.first = first
@@ -745,7 +767,9 @@ class ParsedAuthor:
         return hash(repr(self))
 
     def collapsesTo(self, o):
-        """Return true iff 'o' could be a more canonical version of this author
+        """
+        Return true iff 'o' could be a more canonical version of
+        this author
         """
         if not self.collapsable or not o.collapsable:
             return self
@@ -824,8 +848,10 @@ class ParsedAuthor:
         return None
 
     def getSortingName(self):
-        """Return a representation of this author's name in von-last-first-jr
-           order, unless overridden by ALPH """
+        """
+        Return a representation of this author's name in von-last-first-jr
+        order, unless overridden by ALPH
+        """
         s = self.html
         for pat, v in config.ALPHABETIZE_AUTHOR_AS_RE_LIST:
             if pat.search(s):
@@ -901,7 +927,9 @@ def parseAuthor(s):
         raise
 
 def _parseAuthor(s):
-    """Take an author string and return a list of ParsedAuthor."""
+    """
+    Take an author string and return a list of ParsedAuthor.
+    """
     items = []
 
     s = s.strip()
@@ -995,7 +1023,9 @@ def split_von(f, v, l, x):
 
 
 class Parser:
-    """Parser class: reads BibTeX from a file and returns a BibTeX object."""
+    """
+    Parser class: reads BibTeX from a file and returns a BibTeX object.
+    """
     ## Fields
     # strings: maps entry string keys to their values.
     # newStrings: all string definitions not in config.INITIAL_STRINGS
@@ -1026,7 +1056,8 @@ class Parser:
         line = _advance(it, line)
         m = KEY_RE.match(line)
         if not m:
-            raise ParseError("Expected key at line %s"%self.fileiter.lineno)
+            raise ParseError("Expected key at line %s"
+                             % self.fileiter.lineno)
         key, line = m.groups()
         return key, line
 
@@ -1174,7 +1205,8 @@ class Parser:
             if proto and proto[1:] != '*':
                 proto = proto[1:]
         if proto and proto[1:] != '*':
-            raise ParseError("Missing arguments to %s on line %s" % (self.curEntType, self.entryLine))
+            raise ParseError("Missing arguments to %s on line %s"
+                             % (self.curEntType, self.entryLine))
 
         if self.curEntType == 'string':
             self.strings[v[0]] = v[1]
@@ -1198,8 +1230,8 @@ class Parser:
             self._parse()
         except StopIteration:
             if self.litStringLine:
-                raise ParseError("Unexpected EOF in string (started on %s)" %
-                                 self.litStringLine)
+                raise ParseError("Unexpected EOF in string (started on %s)"
+                                 % self.litStringLine)
             elif self.entryLine:
                 raise ParseError("Unexpected EOF at line %s (entry started "
                                  "on %s)" % (self.fileiter.lineno,
@@ -1225,7 +1257,8 @@ class Parser:
                 line = self._parseEntry(line)
                 self.entryLine = 0
             else:
-                raise ParseError("Bad input at line %s (expected a new entry.)"
+                raise ParseError("Bad input at line %s "
+                                 "(expected a new entry.)"
                                  % it.lineno)
 
 def _advance(it, line):
@@ -1251,7 +1284,9 @@ BRACE_OPEN_RE = re.compile(r'^([^\{\}]*\{)(.*)')
 RAW_DATA_RE = re.compile(r'^([^\s\},]+)(.*)')
 
 def parseFile(filename, result=None):
-    """Helper function: parse a single BibTeX file"""
+    """
+    Helper function: parse a single BibTeX file
+    """
     f = FileIter(fname=filename)
     p = Parser(f, {}, result)
     r = p.parse()
@@ -1261,7 +1296,9 @@ def parseFile(filename, result=None):
     return r
 
 def parseString(string, result=None):
-    """Helper function: parse BibTeX from a string"""
+    """
+    Helper function: parse BibTeX from a string
+    """
     f = FileIter(string=string)
     p = Parser(f, {}, result)
     r = p.parse()

	gnunetbib Bibliography (BibTeX, based on AnonBib)
	Log \| Files \| Refs \| README \| LICENSE