gnunetbib

Bibliography (BibTeX, based on AnonBib)
Log | Files | Refs | README | LICENSE

commit 2d7b53111dfd71b8016a927e06bc60a81ff37e27
parent 887c2fa6a74d8a008b65658f36d56fba3c420f85
Author: Nils Gillmann <ng0@n0.is>
Date:   Mon,  8 Oct 2018 19:48:46 +0000

BibTeX.py: Use future, fix style, comment some types until we have code to handle it.

Signed-off-by: Nils Gillmann <ng0@n0.is>

Diffstat:
MBibTeX.py | 252++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
1 file changed, 174 insertions(+), 78 deletions(-)

diff --git a/BibTeX.py b/BibTeX.py @@ -2,16 +2,19 @@ # Copyright 2003-2008, Nick Mathewson. See LICENSE for licensing info. # Copyright 2018, Nils Gillmann. -"""BibTeX.py -- parse and manipulate BibTeX files and entries. - - Based on perl code by Eddie Kohler; heavily modified. +""" +BibTeX.py -- parse and manipulate BibTeX files and entries. +Based on perl code by Eddie Kohler; heavily modified. """ +from __future__ import print_function +from future.utils import raise_with_traceback import cStringIO import re import sys import os import copy +import future import config import rank @@ -34,7 +37,9 @@ WWW_FIELDS = ['www_section', 'www_important', 'www_remarks', 'www_cache_section', 'www_tags'] def url_untranslate(s): - """Change a BibTeX key into a string suitable for use in a URL.""" + """ + Change a BibTeX key into a string suitable for use in a URL. + """ s = re.sub(r'([%<>`#, &_\';])', lambda m: "_%02x"%ord(m.group(1)), s) @@ -42,12 +47,16 @@ def url_untranslate(s): return s class ParseError(Exception): - """Raised on invalid BibTeX""" + """ + Raised on invalid BibTeX + """ pass def smartJoin(*lst): - """Equivalent to os.path.join, but handle"." and ".." entries a bit better. + """ + Equivalent to os.path.join, but handle"." and ".." + entries a bit better. """ lst = [item for item in lst if item != "."] idx = 0 @@ -59,7 +68,9 @@ def smartJoin(*lst): return os.path.join(*lst) class BibTeX: - """A parsed BibTeX file""" + """ + A parsed BibTeX file + """ def __init__(self): self.entries = [] # List of BibTeXEntry self.byKey = {} # Map from BibTeX key to BibTeX entry. @@ -67,12 +78,14 @@ class BibTeX: """Add a BibTeX entry to this file.""" k = ent.key if self.byKey.get(ent.key.lower()): - print >> sys.stderr, "Already have an entry named %s"%k + print("Already have an entry named %s"%k, file=sys.stderr) return self.entries.append(ent) self.byKey[ent.key.lower()] = ent def resolve(self): - """Validate all entries in this file, and resolve cross-references""" + """ + Validate all entries in this file, and resolve cross-references + """ seen = {} for ent in self.entries: seen.clear() @@ -80,20 +93,21 @@ class BibTeX: try: cr = self.byKey[ent['crossref'].lower()] except KeyError: - print "No such crossref: %s"% ent['crossref'] + print("No such crossref: %s"% ent['crossref']) break if seen.get(cr.key): - raise ParseError("Circular crossref at %s" % ent.key) + #raise ParseError("Circular crossref at %s" % ent.key) + raise_with_traceback(ParseError("Circular crossref at %s" % ent.key)) seen[cr.key] = 1 del ent.entries['crossref'] if cr.entryLine < ent.entryLine: - print "Warning: crossref %s used after declaration"%cr.key + print("Warning: crossref %s used after declaration"%cr.key) for k in cr.entries.keys(): if ent.entries.has_key(k): - print "ERROR: %s defined both in %s and in %s"%( - k, ent.key, cr.key) + print("ERROR: %s defined both in %s and in %s" + %(k, ent.key, cr.key)) else: ent.entries[k] = cr.entries[k] @@ -114,8 +128,9 @@ class BibTeX: self.entries = newEntries def buildAuthorTable(entries): - """Given a list of BibTeXEntry, return a map from parsed author name to - parsed canonical name. + """ + Given a list of BibTeXEntry, return a map from parsed author name to + parsed canonical name. """ authorsByLast = {} for e in entries: @@ -144,20 +159,22 @@ def buildAuthorTable(entries): if 0: for a, c in result.items(): if a != c: - print "Collapsing authors: %s => %s" % (a, c) + print("Collapsing authors: %s => %s" % (a, c)) if 0: - print parseAuthor("Franz Kaashoek")[0].collapsesTo( - parseAuthor("M. Franz Kaashoek")[0]) - print parseAuthor("Paul F. Syverson")[0].collapsesTo( - parseAuthor("Paul Syverson")[0]) - print parseAuthor("Paul Syverson")[0].collapsesTo( - parseAuthor("Paul F. Syverson")[0]) + print(parseAuthor("Franz Kaashoek")[0].collapsesTo( + parseAuthor("M. Franz Kaashoek")[0])) + print(parseAuthor("Paul F. Syverson")[0].collapsesTo( + parseAuthor("Paul Syverson")[0])) + print(parseAuthor("Paul Syverson")[0].collapsesTo( + parseAuthor("Paul F. Syverson")[0])) return result def splitEntriesBy(entries, field): - """Take a list of BibTeX entries and the name of a bibtex field; return - a map from vield value to list of entry.""" + """ + Take a list of BibTeX entries and the name of a bibtex field; return + a map from vield value to list of entry. + """ result = {} for ent in entries: key = ent.get(field) @@ -173,9 +190,11 @@ def splitEntriesBy(entries, field): return result def splitSortedEntriesBy(entries, field): - """Take inputs as in splitEntriesBy, where 'entries' is sorted by 'field'. - Return a list of (field-value, entry-list) tuples, in the order - given in 'entries'.""" + """ + Take inputs as in splitEntriesBy, where 'entries' is sorted by 'field'. + Return a list of (field-value, entry-list) tuples, in the order + given in 'entries'. + """ result = [] curVal = "alskjdsakldj" curList = [] @@ -190,10 +209,11 @@ def splitSortedEntriesBy(entries, field): return result def sortEntriesBy(entries, field, default): - """Take inputs as in splitEntriesBy, and return a list of entries sorted - by the value of 'field'. Entries without 'field' are sorted as if their - value were 'default'. - """ + """ + Take inputs as in splitEntriesBy, and return a list of entries sorted + by the value of 'field'. Entries without 'field' are sorted as if their + value were 'default'. + """ tmp = [] i = 0 for ent in entries: @@ -211,10 +231,11 @@ def sortEntriesBy(entries, field, default): return [t[2] for t in tmp] def splitEntriesByAuthor(entries): - """Take a list of entries, sort them by author names, and return: - a sorted list of (authorname-in-html, bibtex-entry-list) tuples, - a map from authorname-in-html to name-for-url. - Entries with multiple authors appear once per author. + """ + Take a list of entries, sort them by author names, and return: + a sorted list of (authorname-in-html, bibtex-entry-list) tuples, + a map from authorname-in-html to name-for-url. + Entries with multiple authors appear once per author. """ collapsedAuthors = buildAuthorTable(entries) entries = sortEntriesByDate(entries) @@ -249,7 +270,9 @@ def splitEntriesByAuthor(entries): ## return [ t[2] for t in tmp ] def sortEntriesByDate(entries): - """Sort a list of entries by their publication date.""" + """ + Sort a list of entries by their publication date. + """ tmp = [] i = 0 for ent in entries: @@ -265,13 +288,13 @@ def sortEntriesByDate(entries): monthname = match.group(1) mon = MONTHS.index(monthname) except ValueError: - print "Unknown month %r in %s"%(ent.get("month"), ent.key) + print("Unknown month %r in %s"%(ent.get("month"), ent.key)) mon = 0 try: date = int(ent['year'])*13 + mon except KeyError: - print "ERROR: No year field in %s"%ent.key + print("ERROR: No year field in %s"%ent.key) date = 10000*13 except ValueError: date = 10000*13 @@ -289,27 +312,36 @@ DISPLAYED_FIELDS = ['title', 'author', 'journal', 'booktitle', 'note', 'series'] class BibTeXEntry: - """A single BibTeX entry.""" + """ + A single BibTeX entry. + """ + def __init__(self, type, key, entries): self.type = type # Kind of entry: @book, @injournal,etc self.key = key # What key does it have? self.entries = entries # Map from key to value. self.entryLine = 0 # Defined on this line number + def get(self, k, v=None): return self.entries.get(k, v) + def has_key(self, k): return self.entries.has_key(k) + def __getitem__(self, k): return self.entries[k] + def __setitem__(self, k, v): self.entries[k] = v + def __str__(self): return self.format(70, 1) + def getURL(self): """Return the best URL to use for this paper, or None.""" best = None for field in ['www_pdf_url', 'www_ps_gz_url', 'www_ps_url', - 'www_html_url', 'www_txt_url', ]: + 'www_html_url', 'www_txt_url',]: u = self.get(field) if u: if not best: @@ -320,7 +352,9 @@ class BibTeXEntry: return best def format(self, width=70, indent=8, v=0, invStrings={}): - """Format this entry as BibTeX.""" + """ + Format this entry as BibTeX. + """ d = ["@%s{%s,\n" % (self.type, self.key)] if v: df = DISPLAYED_FIELDS[:] @@ -350,28 +384,34 @@ class BibTeXEntry: d.append("}\n") return "".join(d) def resolve(self): - """Handle post-processing for this entry""" + """ + Handle post-processing for this entry + """ a = self.get('author') if a: self.parsedAuthor = parseAuthor(a) - #print a - #print " => ",repr(self.parsedAuthor) + #print(a) + #print(" => ",repr(self.parsedAuthor)) else: self.parsedAuthor = None def isImportant(self): - """Return 1 iff this entry is marked as important""" + """ + Return 1 iff this entry is marked as important + """ imp = self.get("www_important") if imp and imp.strip().lower() not in ("no", "false", "0"): return 1 return 0 def check(self): - """Print any errors for this entry, and return true if there were - none.""" + """ + Print any errors for this entry, and return true if there were + none. + """ errs = self._check() for e in errs: - print e + print(e) return not errs # FIXME: Here's some fields repeated after you enter the @@ -395,7 +435,7 @@ class BibTeXEntry: elif self.type == 'article': fields = 'journal', 'year' elif self.type == 'book': - fields = 'title', 'year' + fields = 'title', 'year', 'publisher' elif self.type == 'booklet': fields = 'title', 'year' elif self.type == 'techreport': @@ -425,7 +465,8 @@ class BibTeXEntry: if self.get("booktitle"): if not self['booktitle'].startswith("Proceedings of") and \ not self['booktitle'].startswith("{Proceedings of"): - errs.append("ERROR (record %s):\t %s's booktitle (%r) doesn't start with 'Proceedings of'" + errs.append("ERROR (record %s):\t %s's booktitle " + "(%r) doesn't start with 'Proceedings of'" % (self.entryLine, selfself.key, self['booktitle'])) if self.has_key("pages") and not re.search(r'\d+--\d+', self['pages']): @@ -434,18 +475,23 @@ class BibTeXEntry: if self.type == 'proceedings': if self.get('title'): - errs.append("ERROR (record %s):\t %s is a proceedings: it should have a booktitle, not a title." + errs.append("ERROR (record %s):\t %s is a proceedings: " + "it should have a booktitle, not a title." % (self.entryLine, self.key)) for field, value in self.entries.items(): if value.translate(ALLCHARS, PRINTINGCHARS): - errs.append("ERROR (record %s):\t %s.%s has non-ASCII characters" + errs.append("ERROR (record %s):\t %s.%s " + "has non-ASCII characters" % (self.entryLine, self.key, field)) if field.startswith("www_") and field not in WWW_FIELDS: - errs.append("ERROR (record %s):\t unknown www field %s" % (self.entryLine, field)) + errs.append("ERROR (record %s):\t unknown " + "www field %s" + % (self.entryLine, field)) if value.strip()[-1:] == '.' and \ field not in ("notes", "www_remarks", "author"): - errs.append("ERROR (record %s):\t %s.%s has an extraneous period" + errs.append("ERROR (record %s):\t %s.%s " + "has an extraneous period" % (self.entryLine, self.key, field)) return errs @@ -462,7 +508,7 @@ class BibTeXEntry: res = ["In the ", m.group(1), '<a href="%s">'%bookurl, m.group(2), "</a>"] else: - res = ['In the <a href="%s">%s</a>' % (bookurl, booktitle)] + res = ['In the <a href="%s">%s</a>'% (bookurl, booktitle)] else: res = ["In the ", booktitle] @@ -519,15 +565,21 @@ class BibTeXEntry: if self.get('month') or self.get('year'): res.append(", %s %s" % (self.get('month', ''), self.get('year', ''))) - elif self.type == 'book': - res = [self['publisher']] - if self.get('year'): - res.append(" ") - res.append(self.get('year')) - # res.append(", %s"%(self.get('year'))) - if self.get('series'): - res.append(",") - res.append(self['series']) + # elif self.type == 'book': + # res = [self['publisher']] + # if self.get('year'): + # res.append(" ") + # res.append(self.get('year')) + # # res.append(", %s"%(self.get('year'))) + # if self.get('series'): + # res.append(",") + # res.append(self['series']) + # elif self.type == 'booklet': + # # res = self.get('publisher') + # res = [self['publisher']] + # if self.get('year'): + # res.append(" ") + # res.append(self.get('year')) elif self.type == 'misc': res = [self['howpublished']] if self.get('month') or self.get('year'): @@ -552,7 +604,9 @@ class BibTeXEntry: return htmlize("".join(res)) def to_html(self, cache_path="./cache", base_url="."): - """Return the HTML for this entry.""" + """ + Return the HTML for this entry. + """ imp = self.isImportant() draft = self.get('year') == 'forthcoming' if imp: @@ -589,7 +643,8 @@ class BibTeXEntry: cache_section = self.get('www_cache_section', ".") if cache_section not in config.CACHE_SECTIONS: if cache_section != ".": - print >>sys.stderr, "Unrecognized cache section %s"%(cache_section) + print("Unrecognized cache section %s"%(cache_section), + file=sys.stderr) cache_section = "." for key, name, ext in (('www_abstract_url', 'abstract', 'abstract'), @@ -639,13 +694,27 @@ class BibTeXEntry: res.append(".") res.append("</span><br />\n") res.append(self.biblio_to_html()) - res.append("<a href='#%s'>&middot;</a>"%url_untranslate(self.key)) + res.append("\n<br>\n(<a href='#%s'>direct link</a>)" + %url_untranslate(self.key)) + if self.get('url'): + res.append(" (<a href='%s'>website</a>)" + %htmlize(self['url'])) res.append("</p>") if self.get('www_remarks'): res.append("<p class='remarks'>%s</p>" %htmlize(self['www_remarks'])) + if self.get('abstract'): + res.append("<p class='abstract'>%s</p>" + %htmlize(self['abstract'])) + + res.append("\n<br>[<a href='#'>Go to top</a>]") + + # We might want to make this invisible (and only + # visible in text browsers: + res.append("\n<hr>") + if imp or draft: res.append("</div>") res.append("</li>\n\n") @@ -671,16 +740,22 @@ def TeXescapeURL(s): return s RE_LONE_AMP = re.compile(r'&([^a-z0-9])') + RE_LONE_I = re.compile(r'\\i([^a-z0-9])') + RE_ACCENT = re.compile(r'\\([\'`~^"c])([^{]|{.})') + RE_LIGATURE = re.compile(r'\\(AE|ae|OE|oe|AA|aa|O|o|ss)([^a-z0-9])') + ACCENT_MAP = {"'" : 'acute', "`" : 'grave', "~" : 'tilde', "^" : 'circ', '"' : 'uml', "c" : 'cedil',} + UNICODE_MAP = {'&nacute;' : '&#x0144;',} + HTML_LIGATURE_MAP = { 'AE' : '&AElig;', 'ae' : '&aelig;', @@ -691,7 +766,9 @@ HTML_LIGATURE_MAP = { 'O' : '&Oslash;', 'o' : '&oslash;', 'ss' : '&szlig;',} + RE_TEX_CMD = re.compile(r"(?:\\[a-zA-Z@]+|\\.)") + RE_PAGE_SPAN = re.compile(r"(\d)--(\d)") def _unaccent(m): @@ -705,7 +782,9 @@ def _unlig_html(m): return "%s%s"%(HTML_LIGATURE_MAP[m.group(1)], m.group(2)) def htmlize(s): - """Turn a TeX string into good-looking HTML.""" + """ + Turn a TeX string into good-looking HTML. + """ s = RE_LONE_AMP.sub(lambda m: "&amp;%s" % m.group(1), s) s = RE_LONE_I.sub(lambda m: "i%s" % m.group(1), s) s = RE_ACCENT.sub(_unaccent, s) @@ -719,14 +798,18 @@ def htmlize(s): return s def author_url(author): - """Given an author's name, return a URL for his/her homepage.""" + """ + Given an author's name, return a URL for his/her homepage. + """ for pat, url in config.AUTHOR_RE_LIST: if pat.search(author): return url return None def txtize(s): - """Turn a TeX string into decnent plaintext.""" + """ + Turn a TeX string into decent plaintext. + """ s = RE_LONE_I.sub(lambda m: "i%s" % m.group(1), s) s = RE_ACCENT.sub(lambda m: "%s" % m.group(2), s) s = RE_LIGATURE.sub(lambda m: "%s%s"%m.groups(), s) @@ -923,7 +1006,7 @@ def parseAuthor(s): try: return _parseAuthor(s) except: - print >>sys.stderr, "Internal error while parsing author %r"%s + print("Internal error while parsing author %r"%s, file=sys.stderr) raise def _parseAuthor(s): @@ -993,12 +1076,17 @@ def _parseAuthor(s): return parsedAuthors ALLCHARS = "".join(map(chr, range(256))) + PRINTINGCHARS = "\t\n\r"+"".join(map(chr, range(32, 127))) + LC_CHARS = "abcdefghijklmnopqrstuvwxyz" + SV_DELCHARS = ("ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "@") + RE_ESCAPED = re.compile(r'\\.') + def split_von(f, v, l, x): in_von = 0 while x: @@ -1126,11 +1214,12 @@ class Parser: data.append(" ") line = it.next() elif line[0] == '#': - print >>sys.stderr, "Weird concat on line %s"%it.lineno + print("Weird concat on line %s"%it.lineno, + file=sys.stderr) elif line[0] in "},": if not data: - print >>sys.stderr, "No data after field on line %s"%( - it.lineno) + print("No data after field on line %s"%(it.lineno), + file=sys.stderr) else: m = RAW_DATA_RE.match(line) if m: @@ -1247,7 +1336,9 @@ class Parser: line = it.next() while 1: # Skip blank lines. - while not line or line.isspace() or OUTER_COMMENT_RE.match(line): + while not line \ + or line.isspace() \ + or OUTER_COMMENT_RE.match(line): line = it.next() # Get the first line of an entry. m = ENTRY_BEGIN_RE.match(line) @@ -1262,17 +1353,22 @@ class Parser: % it.lineno) def _advance(it, line): - while not line or line.isspace() or COMMENT_RE.match(line): + while not line \ + or line.isspace() \ + or COMMENT_RE.match(line): line = it.next() return line # Matches a comment line outside of an entry. OUTER_COMMENT_RE = re.compile(r'^\s*[\#\%]') + # Matches a comment line inside of an entry. COMMENT_RE = re.compile(r'^\s*\%') + # Matches the start of an entry. group 1 is the type of the entry. # group 2 is the rest of the line. ENTRY_BEGIN_RE = re.compile(r'''^\s*\@([^\s\"\%\'\(\)\,\=\{\}]+)(.*)''') + # Start of an entry. group 1 is the keyword naming the entry. BRACE_BEGIN_RE = re.compile(r'\s*\{(.*)') BRACE_END_RE = re.compile(r'\s*\}(.*)') @@ -1317,4 +1413,4 @@ if __name__ == '__main__': for e in r.entries: if e.type in ("proceedings", "journal"): continue - print e.to_html() + print(e.to_html())