BibTeX.py: Use future, fix style, comment some types until we have code to handle it. - gnunetbib

commit 2d7b53111dfd71b8016a927e06bc60a81ff37e27
parent 887c2fa6a74d8a008b65658f36d56fba3c420f85
Author: Nils Gillmann <ng0@n0.is>
Date:   Mon,  8 Oct 2018 19:48:46 +0000

BibTeX.py: Use future, fix style, comment some types until we have code to handle it.

Signed-off-by: Nils Gillmann <ng0@n0.is>

Diffstat:
M BibTeX.py  | 252 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------

1 file changed, 174 insertions(+), 78 deletions(-)
diff --git a/BibTeX.py b/BibTeX.py
@@ -2,16 +2,19 @@
 # Copyright 2003-2008, Nick Mathewson.  See LICENSE for licensing info.
 # Copyright 2018, Nils Gillmann.
 
-"""BibTeX.py -- parse and manipulate BibTeX files and entries.
-
-   Based on perl code by Eddie Kohler; heavily modified.
+"""
+BibTeX.py -- parse and manipulate BibTeX files and entries.
+Based on perl code by Eddie Kohler; heavily modified.
 """
 
+from __future__ import print_function
+from future.utils import raise_with_traceback
 import cStringIO
 import re
 import sys
 import os
 import copy
+import future
 
 import config
 import rank
@@ -34,7 +37,9 @@ WWW_FIELDS = ['www_section', 'www_important', 'www_remarks',
               'www_cache_section', 'www_tags']
 
 def url_untranslate(s):
-    """Change a BibTeX key into a string suitable for use in a URL."""
+    """
+    Change a BibTeX key into a string suitable for use in a URL.
+    """
     s = re.sub(r'([%<>`#, &_\';])',
                lambda m: "_%02x"%ord(m.group(1)),
                s)
@@ -42,12 +47,16 @@ def url_untranslate(s):
     return s
 
 class ParseError(Exception):
-    """Raised on invalid BibTeX"""
+    """
+    Raised on invalid BibTeX
+    """
     pass
 
 
 def smartJoin(*lst):
-    """Equivalent to os.path.join, but handle"." and ".." entries a bit better.
+    """
+    Equivalent to os.path.join, but handle"." and ".."
+    entries a bit better.
     """
     lst = [item for item in lst if item != "."]
     idx = 0
@@ -59,7 +68,9 @@ def smartJoin(*lst):
     return os.path.join(*lst)
 
 class BibTeX:
-    """A parsed BibTeX file"""
+    """
+    A parsed BibTeX file
+    """
     def __init__(self):
         self.entries = [] # List of BibTeXEntry
         self.byKey = {} # Map from BibTeX key to BibTeX entry.
@@ -67,12 +78,14 @@ class BibTeX:
         """Add a BibTeX entry to this file."""
         k = ent.key
         if self.byKey.get(ent.key.lower()):
-            print >> sys.stderr, "Already have an entry named %s"%k
+            print("Already have an entry named %s"%k, file=sys.stderr)
             return
         self.entries.append(ent)
         self.byKey[ent.key.lower()] = ent
     def resolve(self):
-        """Validate all entries in this file, and resolve cross-references"""
+        """
+        Validate all entries in this file, and resolve cross-references
+        """
         seen = {}
         for ent in self.entries:
             seen.clear()
@@ -80,20 +93,21 @@ class BibTeX:
                 try:
                     cr = self.byKey[ent['crossref'].lower()]
                 except KeyError:
-                    print "No such crossref: %s"% ent['crossref']
+                    print("No such crossref: %s"% ent['crossref'])
                     break
                 if seen.get(cr.key):
-                    raise ParseError("Circular crossref at %s" % ent.key)
+                    #raise ParseError("Circular crossref at %s" % ent.key)
+                    raise_with_traceback(ParseError("Circular crossref at %s" % ent.key))
                 seen[cr.key] = 1
                 del ent.entries['crossref']
 
                 if cr.entryLine < ent.entryLine:
-                    print "Warning: crossref %s used after declaration"%cr.key
+                    print("Warning: crossref %s used after declaration"%cr.key)
 
                 for k in cr.entries.keys():
                     if ent.entries.has_key(k):
-                        print "ERROR: %s defined both in %s and in %s"%(
-                            k, ent.key, cr.key)
+                        print("ERROR: %s defined both in %s and in %s"
+                              %(k, ent.key, cr.key))
                     else:
                         ent.entries[k] = cr.entries[k]
 
@@ -114,8 +128,9 @@ class BibTeX:
         self.entries = newEntries
 
 def buildAuthorTable(entries):
-    """Given a list of BibTeXEntry, return a map from parsed author name to
-       parsed canonical name.
+    """
+    Given a list of BibTeXEntry, return a map from parsed author name to
+    parsed canonical name.
     """
     authorsByLast = {}
     for e in entries:
@@ -144,20 +159,22 @@ def buildAuthorTable(entries):
     if 0:
         for a, c in result.items():
             if a != c:
-                print "Collapsing authors: %s => %s" % (a, c)
+                print("Collapsing authors: %s => %s" % (a, c))
     if 0:
-        print parseAuthor("Franz Kaashoek")[0].collapsesTo(
-            parseAuthor("M. Franz Kaashoek")[0])
-        print parseAuthor("Paul F. Syverson")[0].collapsesTo(
-            parseAuthor("Paul Syverson")[0])
-        print parseAuthor("Paul Syverson")[0].collapsesTo(
-            parseAuthor("Paul F. Syverson")[0])
+        print(parseAuthor("Franz Kaashoek")[0].collapsesTo(
+            parseAuthor("M. Franz Kaashoek")[0]))
+        print(parseAuthor("Paul F. Syverson")[0].collapsesTo(
+            parseAuthor("Paul Syverson")[0]))
+        print(parseAuthor("Paul Syverson")[0].collapsesTo(
+            parseAuthor("Paul F. Syverson")[0]))
 
     return result
 
 def splitEntriesBy(entries, field):
-    """Take a list of BibTeX entries and the name of a bibtex field; return
-       a map from vield value to list of entry."""
+    """
+    Take a list of BibTeX entries and the name of a bibtex field; return
+    a map from vield value to list of entry.
+    """
     result = {}
     for ent in entries:
         key = ent.get(field)
@@ -173,9 +190,11 @@ def splitEntriesBy(entries, field):
     return result
 
 def splitSortedEntriesBy(entries, field):
-    """Take inputs as in splitEntriesBy, where 'entries' is sorted by 'field'.
-       Return a list of (field-value, entry-list) tuples, in the order
-       given in 'entries'."""
+    """
+    Take inputs as in splitEntriesBy, where 'entries' is sorted by 'field'.
+    Return a list of (field-value, entry-list) tuples, in the order
+    given in 'entries'.
+    """
     result = []
     curVal = "alskjdsakldj"
     curList = []
@@ -190,10 +209,11 @@ def splitSortedEntriesBy(entries, field):
     return result
 
 def sortEntriesBy(entries, field, default):
-    """Take inputs as in splitEntriesBy, and return a list of entries sorted
-       by the value of 'field'. Entries without 'field' are sorted as if their
-       value were 'default'.
-       """
+    """
+    Take inputs as in splitEntriesBy, and return a list of entries sorted
+    by the value of 'field'. Entries without 'field' are sorted as if their
+    value were 'default'.
+    """
     tmp = []
     i = 0
     for ent in entries:
@@ -211,10 +231,11 @@ def sortEntriesBy(entries, field, default):
     return [t[2] for t in tmp]
 
 def splitEntriesByAuthor(entries):
-    """Take a list of entries, sort them by author names, and return:
-         a sorted list of (authorname-in-html, bibtex-entry-list) tuples,
-         a map from authorname-in-html to name-for-url.
-       Entries with multiple authors appear once per author.
+    """
+    Take a list of entries, sort them by author names, and return:
+    a sorted list of (authorname-in-html, bibtex-entry-list) tuples,
+    a map from authorname-in-html to name-for-url.
+    Entries with multiple authors appear once per author.
     """
     collapsedAuthors = buildAuthorTable(entries)
     entries = sortEntriesByDate(entries)
@@ -249,7 +270,9 @@ def splitEntriesByAuthor(entries):
 ##     return [ t[2] for t in tmp ]
 
 def sortEntriesByDate(entries):
-    """Sort a list of entries by their publication date."""
+    """
+    Sort a list of entries by their publication date.
+    """
     tmp = []
     i = 0
     for ent in entries:
@@ -265,13 +288,13 @@ def sortEntriesByDate(entries):
                     monthname = match.group(1)
             mon = MONTHS.index(monthname)
         except ValueError:
-            print "Unknown month %r in %s"%(ent.get("month"), ent.key)
+            print("Unknown month %r in %s"%(ent.get("month"), ent.key))
             mon = 0
 
         try:
             date = int(ent['year'])*13 + mon
         except KeyError:
-            print "ERROR: No year field in %s"%ent.key
+            print("ERROR: No year field in %s"%ent.key)
             date = 10000*13
         except ValueError:
             date = 10000*13
@@ -289,27 +312,36 @@ DISPLAYED_FIELDS = ['title', 'author', 'journal', 'booktitle',
                     'note', 'series']
 
 class BibTeXEntry:
-    """A single BibTeX entry."""
+    """
+    A single BibTeX entry.
+    """
+
     def __init__(self, type, key, entries):
         self.type = type  # Kind of entry: @book, @injournal,etc
         self.key = key # What key does it have?
         self.entries = entries # Map from key to value.
         self.entryLine = 0 # Defined on this line number
+
     def get(self, k, v=None):
         return self.entries.get(k, v)
+
     def has_key(self, k):
         return self.entries.has_key(k)
+
     def __getitem__(self, k):
         return self.entries[k]
+
     def __setitem__(self, k, v):
         self.entries[k] = v
+
     def __str__(self):
         return self.format(70, 1)
+
     def getURL(self):
         """Return the best URL to use for this paper, or None."""
         best = None
         for field in ['www_pdf_url', 'www_ps_gz_url', 'www_ps_url',
-                      'www_html_url', 'www_txt_url', ]:
+                      'www_html_url', 'www_txt_url',]:
             u = self.get(field)
             if u:
                 if not best:
@@ -320,7 +352,9 @@ class BibTeXEntry:
         return best
 
     def format(self, width=70, indent=8, v=0, invStrings={}):
-        """Format this entry as BibTeX."""
+        """
+        Format this entry as BibTeX.
+        """
         d = ["@%s{%s,\n" % (self.type, self.key)]
         if v:
             df = DISPLAYED_FIELDS[:]
@@ -350,28 +384,34 @@ class BibTeXEntry:
         d.append("}\n")
         return "".join(d)
     def resolve(self):
-        """Handle post-processing for this entry"""
+        """
+        Handle post-processing for this entry
+        """
         a = self.get('author')
         if a:
             self.parsedAuthor = parseAuthor(a)
-            #print a
-            #print "   => ",repr(self.parsedAuthor)
+            #print(a)
+            #print("   => ",repr(self.parsedAuthor))
         else:
             self.parsedAuthor = None
 
     def isImportant(self):
-        """Return 1 iff this entry is marked as important"""
+        """
+        Return 1 iff this entry is marked as important
+        """
         imp = self.get("www_important")
         if imp and imp.strip().lower() not in ("no", "false", "0"):
             return 1
         return 0
 
     def check(self):
-        """Print any errors for this entry, and return true if there were
-           none."""
+        """
+        Print any errors for this entry, and return true if there were
+        none.
+        """
         errs = self._check()
         for e in errs:
-            print e
+            print(e)
         return not errs
 
     # FIXME: Here's some fields repeated after you enter the
@@ -395,7 +435,7 @@ class BibTeXEntry:
         elif self.type == 'article':
             fields = 'journal', 'year'
         elif self.type == 'book':
-            fields = 'title', 'year'
+            fields = 'title', 'year', 'publisher'
         elif self.type == 'booklet':
             fields = 'title', 'year'
         elif self.type == 'techreport':
@@ -425,7 +465,8 @@ class BibTeXEntry:
             if self.get("booktitle"):
                 if not self['booktitle'].startswith("Proceedings of") and \
                    not self['booktitle'].startswith("{Proceedings of"):
-                    errs.append("ERROR (record %s):\t %s's booktitle (%r) doesn't start with 'Proceedings of'"
+                    errs.append("ERROR (record %s):\t %s's booktitle "
+                                "(%r) doesn't start with 'Proceedings of'"
                                 % (self.entryLine, selfself.key, self['booktitle']))
 
         if self.has_key("pages") and not re.search(r'\d+--\d+', self['pages']):
@@ -434,18 +475,23 @@ class BibTeXEntry:
 
         if self.type == 'proceedings':
             if self.get('title'):
-                errs.append("ERROR (record %s):\t %s is a proceedings: it should have a booktitle, not a title."
+                errs.append("ERROR (record %s):\t %s is a proceedings: "
+                            "it should have a booktitle, not a title."
                             % (self.entryLine, self.key))
 
         for field, value in self.entries.items():
             if value.translate(ALLCHARS, PRINTINGCHARS):
-                errs.append("ERROR (record %s):\t %s.%s has non-ASCII characters"
+                errs.append("ERROR (record %s):\t %s.%s "
+                            "has non-ASCII characters"
                             % (self.entryLine, self.key, field))
             if field.startswith("www_") and field not in WWW_FIELDS:
-                errs.append("ERROR (record %s):\t unknown www field %s" % (self.entryLine, field))
+                errs.append("ERROR (record %s):\t unknown "
+                            "www field %s"
+                            % (self.entryLine, field))
             if value.strip()[-1:] == '.' and \
                 field not in ("notes", "www_remarks", "author"):
-                errs.append("ERROR (record %s):\t %s.%s has an extraneous period"
+                errs.append("ERROR (record %s):\t %s.%s "
+                            "has an extraneous period"
                             % (self.entryLine, self.key, field))
         return errs
 
@@ -462,7 +508,7 @@ class BibTeXEntry:
                     res = ["In the ", m.group(1),
                            '<a href="%s">'%bookurl, m.group(2), "</a>"]
                 else:
-                    res = ['In the <a href="%s">%s</a>' % (bookurl, booktitle)]
+                    res = ['In the <a href="%s">%s</a>'% (bookurl, booktitle)]
             else:
                 res = ["In the ", booktitle]
 
@@ -519,15 +565,21 @@ class BibTeXEntry:
             if self.get('month') or self.get('year'):
                 res.append(", %s %s" % (self.get('month', ''),
                                         self.get('year', '')))
-        elif self.type == 'book':
-            res = [self['publisher']]
-            if self.get('year'):
-                res.append(" ")
-                res.append(self.get('year'))
-                # res.append(", %s"%(self.get('year')))
-            if self.get('series'):
-                res.append(",")
-                res.append(self['series'])
+        # elif self.type == 'book':
+        #     res = [self['publisher']]
+        #     if self.get('year'):
+        #         res.append(" ")
+        #         res.append(self.get('year'))
+        #         # res.append(", %s"%(self.get('year')))
+        #     if self.get('series'):
+        #         res.append(",")
+        #         res.append(self['series'])
+        # elif self.type == 'booklet':
+        #     # res = self.get('publisher')
+        #     res = [self['publisher']]
+        #     if self.get('year'):
+        #         res.append(" ")
+        #         res.append(self.get('year'))
         elif self.type == 'misc':
             res = [self['howpublished']]
             if self.get('month') or self.get('year'):
@@ -552,7 +604,9 @@ class BibTeXEntry:
         return htmlize("".join(res))
 
     def to_html(self, cache_path="./cache", base_url="."):
-        """Return the HTML for this entry."""
+        """
+        Return the HTML for this entry.
+        """
         imp = self.isImportant()
         draft = self.get('year') == 'forthcoming'
         if imp:
@@ -589,7 +643,8 @@ class BibTeXEntry:
             cache_section = self.get('www_cache_section', ".")
             if cache_section not in config.CACHE_SECTIONS:
                 if cache_section != ".":
-                    print >>sys.stderr, "Unrecognized cache section %s"%(cache_section)
+                    print("Unrecognized cache section %s"%(cache_section),
+                          file=sys.stderr)
                     cache_section = "."
 
             for key, name, ext in (('www_abstract_url', 'abstract', 'abstract'),
@@ -639,13 +694,27 @@ class BibTeXEntry:
             res.append(".")
         res.append("</span><br />\n")
         res.append(self.biblio_to_html())
-        res.append("<a href='#%s'>&middot;</a>"%url_untranslate(self.key))
+        res.append("\n<br>\n(<a href='#%s'>direct link</a>)"
+                   %url_untranslate(self.key))
+        if self.get('url'):
+            res.append(" (<a href='%s'>website</a>)"
+                       %htmlize(self['url']))
         res.append("</p>")
 
         if self.get('www_remarks'):
             res.append("<p class='remarks'>%s</p>"
                        %htmlize(self['www_remarks']))
 
+        if self.get('abstract'):
+            res.append("<p class='abstract'>%s</p>"
+                       %htmlize(self['abstract']))
+
+        res.append("\n<br>[<a href='#'>Go to top</a>]")
+
+        # We might want to make this invisible (and only
+        # visible in text browsers:
+        res.append("\n<hr>")
+
         if imp or draft:
             res.append("</div>")
         res.append("</li>\n\n")
@@ -671,16 +740,22 @@ def TeXescapeURL(s):
     return s
 
 RE_LONE_AMP = re.compile(r'&([^a-z0-9])')
+
 RE_LONE_I = re.compile(r'\\i([^a-z0-9])')
+
 RE_ACCENT = re.compile(r'\\([\'`~^"c])([^{]|{.})')
+
 RE_LIGATURE = re.compile(r'\\(AE|ae|OE|oe|AA|aa|O|o|ss)([^a-z0-9])')
+
 ACCENT_MAP = {"'" : 'acute',
               "`" : 'grave',
               "~" : 'tilde',
               "^" : 'circ',
               '"' : 'uml',
               "c" : 'cedil',}
+
 UNICODE_MAP = {'&nacute;' : '&#x0144;',}
+
 HTML_LIGATURE_MAP = {
     'AE' : '&AElig;',
     'ae' : '&aelig;',
@@ -691,7 +766,9 @@ HTML_LIGATURE_MAP = {
     'O'  : '&Oslash;',
     'o'  : '&oslash;',
     'ss' : '&szlig;',}
+
 RE_TEX_CMD = re.compile(r"(?:\\[a-zA-Z@]+|\\.)")
+
 RE_PAGE_SPAN = re.compile(r"(\d)--(\d)")
 
 def _unaccent(m):
@@ -705,7 +782,9 @@ def _unlig_html(m):
     return "%s%s"%(HTML_LIGATURE_MAP[m.group(1)], m.group(2))
 
 def htmlize(s):
-    """Turn a TeX string into good-looking HTML."""
+    """
+    Turn a TeX string into good-looking HTML.
+    """
     s = RE_LONE_AMP.sub(lambda m: "&amp;%s" % m.group(1), s)
     s = RE_LONE_I.sub(lambda m: "i%s" % m.group(1), s)
     s = RE_ACCENT.sub(_unaccent, s)
@@ -719,14 +798,18 @@ def htmlize(s):
     return s
 
 def author_url(author):
-    """Given an author's name, return a URL for his/her homepage."""
+    """
+    Given an author's name, return a URL for his/her homepage.
+    """
     for pat, url in config.AUTHOR_RE_LIST:
         if pat.search(author):
             return url
     return None
 
 def txtize(s):
-    """Turn a TeX string into decnent plaintext."""
+    """
+    Turn a TeX string into decent plaintext.
+    """
     s = RE_LONE_I.sub(lambda m: "i%s" % m.group(1), s)
     s = RE_ACCENT.sub(lambda m: "%s" % m.group(2), s)
     s = RE_LIGATURE.sub(lambda m: "%s%s"%m.groups(), s)
@@ -923,7 +1006,7 @@ def parseAuthor(s):
     try:
         return _parseAuthor(s)
     except:
-        print >>sys.stderr, "Internal error while parsing author %r"%s
+        print("Internal error while parsing author %r"%s, file=sys.stderr)
         raise
 
 def _parseAuthor(s):
@@ -993,12 +1076,17 @@ def _parseAuthor(s):
     return parsedAuthors
 
 ALLCHARS = "".join(map(chr, range(256)))
+
 PRINTINGCHARS = "\t\n\r"+"".join(map(chr, range(32, 127)))
+
 LC_CHARS = "abcdefghijklmnopqrstuvwxyz"
+
 SV_DELCHARS = ("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                "abcdefghijklmnopqrstuvwxyz"
                "@")
+
 RE_ESCAPED = re.compile(r'\\.')
+
 def split_von(f, v, l, x):
     in_von = 0
     while x:
@@ -1126,11 +1214,12 @@ class Parser:
                         data.append(" ")
                         line = it.next()
             elif line[0] == '#':
-                print >>sys.stderr, "Weird concat on line %s"%it.lineno
+                print("Weird concat on line %s"%it.lineno,
+                      file=sys.stderr)
             elif line[0] in "},":
                 if not data:
-                    print >>sys.stderr, "No data after field on line %s"%(
-                        it.lineno)
+                    print("No data after field on line %s"%(it.lineno),
+                          file=sys.stderr)
             else:
                 m = RAW_DATA_RE.match(line)
                 if m:
@@ -1247,7 +1336,9 @@ class Parser:
         line = it.next()
         while 1:
             # Skip blank lines.
-            while not line or line.isspace() or OUTER_COMMENT_RE.match(line):
+            while not line \
+                  or line.isspace() \
+                  or OUTER_COMMENT_RE.match(line):
                 line = it.next()
             # Get the first line of an entry.
             m = ENTRY_BEGIN_RE.match(line)
@@ -1262,17 +1353,22 @@ class Parser:
                                  % it.lineno)
 
 def _advance(it, line):
-    while not line or line.isspace() or COMMENT_RE.match(line):
+    while not line \
+          or line.isspace() \
+          or COMMENT_RE.match(line):
         line = it.next()
     return line
 
 # Matches a comment line outside of an entry.
 OUTER_COMMENT_RE = re.compile(r'^\s*[\#\%]')
+
 # Matches a comment line inside of an entry.
 COMMENT_RE = re.compile(r'^\s*\%')
+
 # Matches the start of an entry. group 1 is the type of the entry.
 # group 2 is the rest of the line.
 ENTRY_BEGIN_RE = re.compile(r'''^\s*\@([^\s\"\%\'\(\)\,\=\{\}]+)(.*)''')
+
 # Start of an entry.  group 1 is the keyword naming the entry.
 BRACE_BEGIN_RE = re.compile(r'\s*\{(.*)')
 BRACE_END_RE = re.compile(r'\s*\}(.*)')
@@ -1317,4 +1413,4 @@ if __name__ == '__main__':
 
     for e in r.entries:
         if e.type in ("proceedings", "journal"): continue
-        print e.to_html()
+        print(e.to_html())

	gnunetbib Bibliography (BibTeX, based on AnonBib)
	Log \| Files \| Refs \| README \| LICENSE