commit 2d7b53111dfd71b8016a927e06bc60a81ff37e27
parent 887c2fa6a74d8a008b65658f36d56fba3c420f85
Author: Nils Gillmann <ng0@n0.is>
Date: Mon, 8 Oct 2018 19:48:46 +0000
BibTeX.py: Use future, fix style, comment some types until we have code to handle it.
Signed-off-by: Nils Gillmann <ng0@n0.is>
Diffstat:
| M | BibTeX.py | | | 252 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------- |
1 file changed, 174 insertions(+), 78 deletions(-)
diff --git a/BibTeX.py b/BibTeX.py
@@ -2,16 +2,19 @@
# Copyright 2003-2008, Nick Mathewson. See LICENSE for licensing info.
# Copyright 2018, Nils Gillmann.
-"""BibTeX.py -- parse and manipulate BibTeX files and entries.
-
- Based on perl code by Eddie Kohler; heavily modified.
+"""
+BibTeX.py -- parse and manipulate BibTeX files and entries.
+Based on perl code by Eddie Kohler; heavily modified.
"""
+from __future__ import print_function
+from future.utils import raise_with_traceback
import cStringIO
import re
import sys
import os
import copy
+import future
import config
import rank
@@ -34,7 +37,9 @@ WWW_FIELDS = ['www_section', 'www_important', 'www_remarks',
'www_cache_section', 'www_tags']
def url_untranslate(s):
- """Change a BibTeX key into a string suitable for use in a URL."""
+ """
+ Change a BibTeX key into a string suitable for use in a URL.
+ """
s = re.sub(r'([%<>`#, &_\';])',
lambda m: "_%02x"%ord(m.group(1)),
s)
@@ -42,12 +47,16 @@ def url_untranslate(s):
return s
class ParseError(Exception):
- """Raised on invalid BibTeX"""
+ """
+ Raised on invalid BibTeX
+ """
pass
def smartJoin(*lst):
- """Equivalent to os.path.join, but handle"." and ".." entries a bit better.
+ """
+ Equivalent to os.path.join, but handle"." and ".."
+ entries a bit better.
"""
lst = [item for item in lst if item != "."]
idx = 0
@@ -59,7 +68,9 @@ def smartJoin(*lst):
return os.path.join(*lst)
class BibTeX:
- """A parsed BibTeX file"""
+ """
+ A parsed BibTeX file
+ """
def __init__(self):
self.entries = [] # List of BibTeXEntry
self.byKey = {} # Map from BibTeX key to BibTeX entry.
@@ -67,12 +78,14 @@ class BibTeX:
"""Add a BibTeX entry to this file."""
k = ent.key
if self.byKey.get(ent.key.lower()):
- print >> sys.stderr, "Already have an entry named %s"%k
+ print("Already have an entry named %s"%k, file=sys.stderr)
return
self.entries.append(ent)
self.byKey[ent.key.lower()] = ent
def resolve(self):
- """Validate all entries in this file, and resolve cross-references"""
+ """
+ Validate all entries in this file, and resolve cross-references
+ """
seen = {}
for ent in self.entries:
seen.clear()
@@ -80,20 +93,21 @@ class BibTeX:
try:
cr = self.byKey[ent['crossref'].lower()]
except KeyError:
- print "No such crossref: %s"% ent['crossref']
+ print("No such crossref: %s"% ent['crossref'])
break
if seen.get(cr.key):
- raise ParseError("Circular crossref at %s" % ent.key)
+ #raise ParseError("Circular crossref at %s" % ent.key)
+ raise_with_traceback(ParseError("Circular crossref at %s" % ent.key))
seen[cr.key] = 1
del ent.entries['crossref']
if cr.entryLine < ent.entryLine:
- print "Warning: crossref %s used after declaration"%cr.key
+ print("Warning: crossref %s used after declaration"%cr.key)
for k in cr.entries.keys():
if ent.entries.has_key(k):
- print "ERROR: %s defined both in %s and in %s"%(
- k, ent.key, cr.key)
+ print("ERROR: %s defined both in %s and in %s"
+ %(k, ent.key, cr.key))
else:
ent.entries[k] = cr.entries[k]
@@ -114,8 +128,9 @@ class BibTeX:
self.entries = newEntries
def buildAuthorTable(entries):
- """Given a list of BibTeXEntry, return a map from parsed author name to
- parsed canonical name.
+ """
+ Given a list of BibTeXEntry, return a map from parsed author name to
+ parsed canonical name.
"""
authorsByLast = {}
for e in entries:
@@ -144,20 +159,22 @@ def buildAuthorTable(entries):
if 0:
for a, c in result.items():
if a != c:
- print "Collapsing authors: %s => %s" % (a, c)
+ print("Collapsing authors: %s => %s" % (a, c))
if 0:
- print parseAuthor("Franz Kaashoek")[0].collapsesTo(
- parseAuthor("M. Franz Kaashoek")[0])
- print parseAuthor("Paul F. Syverson")[0].collapsesTo(
- parseAuthor("Paul Syverson")[0])
- print parseAuthor("Paul Syverson")[0].collapsesTo(
- parseAuthor("Paul F. Syverson")[0])
+ print(parseAuthor("Franz Kaashoek")[0].collapsesTo(
+ parseAuthor("M. Franz Kaashoek")[0]))
+ print(parseAuthor("Paul F. Syverson")[0].collapsesTo(
+ parseAuthor("Paul Syverson")[0]))
+ print(parseAuthor("Paul Syverson")[0].collapsesTo(
+ parseAuthor("Paul F. Syverson")[0]))
return result
def splitEntriesBy(entries, field):
- """Take a list of BibTeX entries and the name of a bibtex field; return
- a map from vield value to list of entry."""
+ """
+ Take a list of BibTeX entries and the name of a bibtex field; return
+ a map from vield value to list of entry.
+ """
result = {}
for ent in entries:
key = ent.get(field)
@@ -173,9 +190,11 @@ def splitEntriesBy(entries, field):
return result
def splitSortedEntriesBy(entries, field):
- """Take inputs as in splitEntriesBy, where 'entries' is sorted by 'field'.
- Return a list of (field-value, entry-list) tuples, in the order
- given in 'entries'."""
+ """
+ Take inputs as in splitEntriesBy, where 'entries' is sorted by 'field'.
+ Return a list of (field-value, entry-list) tuples, in the order
+ given in 'entries'.
+ """
result = []
curVal = "alskjdsakldj"
curList = []
@@ -190,10 +209,11 @@ def splitSortedEntriesBy(entries, field):
return result
def sortEntriesBy(entries, field, default):
- """Take inputs as in splitEntriesBy, and return a list of entries sorted
- by the value of 'field'. Entries without 'field' are sorted as if their
- value were 'default'.
- """
+ """
+ Take inputs as in splitEntriesBy, and return a list of entries sorted
+ by the value of 'field'. Entries without 'field' are sorted as if their
+ value were 'default'.
+ """
tmp = []
i = 0
for ent in entries:
@@ -211,10 +231,11 @@ def sortEntriesBy(entries, field, default):
return [t[2] for t in tmp]
def splitEntriesByAuthor(entries):
- """Take a list of entries, sort them by author names, and return:
- a sorted list of (authorname-in-html, bibtex-entry-list) tuples,
- a map from authorname-in-html to name-for-url.
- Entries with multiple authors appear once per author.
+ """
+ Take a list of entries, sort them by author names, and return:
+ a sorted list of (authorname-in-html, bibtex-entry-list) tuples,
+ a map from authorname-in-html to name-for-url.
+ Entries with multiple authors appear once per author.
"""
collapsedAuthors = buildAuthorTable(entries)
entries = sortEntriesByDate(entries)
@@ -249,7 +270,9 @@ def splitEntriesByAuthor(entries):
## return [ t[2] for t in tmp ]
def sortEntriesByDate(entries):
- """Sort a list of entries by their publication date."""
+ """
+ Sort a list of entries by their publication date.
+ """
tmp = []
i = 0
for ent in entries:
@@ -265,13 +288,13 @@ def sortEntriesByDate(entries):
monthname = match.group(1)
mon = MONTHS.index(monthname)
except ValueError:
- print "Unknown month %r in %s"%(ent.get("month"), ent.key)
+ print("Unknown month %r in %s"%(ent.get("month"), ent.key))
mon = 0
try:
date = int(ent['year'])*13 + mon
except KeyError:
- print "ERROR: No year field in %s"%ent.key
+ print("ERROR: No year field in %s"%ent.key)
date = 10000*13
except ValueError:
date = 10000*13
@@ -289,27 +312,36 @@ DISPLAYED_FIELDS = ['title', 'author', 'journal', 'booktitle',
'note', 'series']
class BibTeXEntry:
- """A single BibTeX entry."""
+ """
+ A single BibTeX entry.
+ """
+
def __init__(self, type, key, entries):
self.type = type # Kind of entry: @book, @injournal,etc
self.key = key # What key does it have?
self.entries = entries # Map from key to value.
self.entryLine = 0 # Defined on this line number
+
def get(self, k, v=None):
return self.entries.get(k, v)
+
def has_key(self, k):
return self.entries.has_key(k)
+
def __getitem__(self, k):
return self.entries[k]
+
def __setitem__(self, k, v):
self.entries[k] = v
+
def __str__(self):
return self.format(70, 1)
+
def getURL(self):
"""Return the best URL to use for this paper, or None."""
best = None
for field in ['www_pdf_url', 'www_ps_gz_url', 'www_ps_url',
- 'www_html_url', 'www_txt_url', ]:
+ 'www_html_url', 'www_txt_url',]:
u = self.get(field)
if u:
if not best:
@@ -320,7 +352,9 @@ class BibTeXEntry:
return best
def format(self, width=70, indent=8, v=0, invStrings={}):
- """Format this entry as BibTeX."""
+ """
+ Format this entry as BibTeX.
+ """
d = ["@%s{%s,\n" % (self.type, self.key)]
if v:
df = DISPLAYED_FIELDS[:]
@@ -350,28 +384,34 @@ class BibTeXEntry:
d.append("}\n")
return "".join(d)
def resolve(self):
- """Handle post-processing for this entry"""
+ """
+ Handle post-processing for this entry
+ """
a = self.get('author')
if a:
self.parsedAuthor = parseAuthor(a)
- #print a
- #print " => ",repr(self.parsedAuthor)
+ #print(a)
+ #print(" => ",repr(self.parsedAuthor))
else:
self.parsedAuthor = None
def isImportant(self):
- """Return 1 iff this entry is marked as important"""
+ """
+ Return 1 iff this entry is marked as important
+ """
imp = self.get("www_important")
if imp and imp.strip().lower() not in ("no", "false", "0"):
return 1
return 0
def check(self):
- """Print any errors for this entry, and return true if there were
- none."""
+ """
+ Print any errors for this entry, and return true if there were
+ none.
+ """
errs = self._check()
for e in errs:
- print e
+ print(e)
return not errs
# FIXME: Here's some fields repeated after you enter the
@@ -395,7 +435,7 @@ class BibTeXEntry:
elif self.type == 'article':
fields = 'journal', 'year'
elif self.type == 'book':
- fields = 'title', 'year'
+ fields = 'title', 'year', 'publisher'
elif self.type == 'booklet':
fields = 'title', 'year'
elif self.type == 'techreport':
@@ -425,7 +465,8 @@ class BibTeXEntry:
if self.get("booktitle"):
if not self['booktitle'].startswith("Proceedings of") and \
not self['booktitle'].startswith("{Proceedings of"):
- errs.append("ERROR (record %s):\t %s's booktitle (%r) doesn't start with 'Proceedings of'"
+ errs.append("ERROR (record %s):\t %s's booktitle "
+ "(%r) doesn't start with 'Proceedings of'"
% (self.entryLine, selfself.key, self['booktitle']))
if self.has_key("pages") and not re.search(r'\d+--\d+', self['pages']):
@@ -434,18 +475,23 @@ class BibTeXEntry:
if self.type == 'proceedings':
if self.get('title'):
- errs.append("ERROR (record %s):\t %s is a proceedings: it should have a booktitle, not a title."
+ errs.append("ERROR (record %s):\t %s is a proceedings: "
+ "it should have a booktitle, not a title."
% (self.entryLine, self.key))
for field, value in self.entries.items():
if value.translate(ALLCHARS, PRINTINGCHARS):
- errs.append("ERROR (record %s):\t %s.%s has non-ASCII characters"
+ errs.append("ERROR (record %s):\t %s.%s "
+ "has non-ASCII characters"
% (self.entryLine, self.key, field))
if field.startswith("www_") and field not in WWW_FIELDS:
- errs.append("ERROR (record %s):\t unknown www field %s" % (self.entryLine, field))
+ errs.append("ERROR (record %s):\t unknown "
+ "www field %s"
+ % (self.entryLine, field))
if value.strip()[-1:] == '.' and \
field not in ("notes", "www_remarks", "author"):
- errs.append("ERROR (record %s):\t %s.%s has an extraneous period"
+ errs.append("ERROR (record %s):\t %s.%s "
+ "has an extraneous period"
% (self.entryLine, self.key, field))
return errs
@@ -462,7 +508,7 @@ class BibTeXEntry:
res = ["In the ", m.group(1),
'<a href="%s">'%bookurl, m.group(2), "</a>"]
else:
- res = ['In the <a href="%s">%s</a>' % (bookurl, booktitle)]
+ res = ['In the <a href="%s">%s</a>'% (bookurl, booktitle)]
else:
res = ["In the ", booktitle]
@@ -519,15 +565,21 @@ class BibTeXEntry:
if self.get('month') or self.get('year'):
res.append(", %s %s" % (self.get('month', ''),
self.get('year', '')))
- elif self.type == 'book':
- res = [self['publisher']]
- if self.get('year'):
- res.append(" ")
- res.append(self.get('year'))
- # res.append(", %s"%(self.get('year')))
- if self.get('series'):
- res.append(",")
- res.append(self['series'])
+ # elif self.type == 'book':
+ # res = [self['publisher']]
+ # if self.get('year'):
+ # res.append(" ")
+ # res.append(self.get('year'))
+ # # res.append(", %s"%(self.get('year')))
+ # if self.get('series'):
+ # res.append(",")
+ # res.append(self['series'])
+ # elif self.type == 'booklet':
+ # # res = self.get('publisher')
+ # res = [self['publisher']]
+ # if self.get('year'):
+ # res.append(" ")
+ # res.append(self.get('year'))
elif self.type == 'misc':
res = [self['howpublished']]
if self.get('month') or self.get('year'):
@@ -552,7 +604,9 @@ class BibTeXEntry:
return htmlize("".join(res))
def to_html(self, cache_path="./cache", base_url="."):
- """Return the HTML for this entry."""
+ """
+ Return the HTML for this entry.
+ """
imp = self.isImportant()
draft = self.get('year') == 'forthcoming'
if imp:
@@ -589,7 +643,8 @@ class BibTeXEntry:
cache_section = self.get('www_cache_section', ".")
if cache_section not in config.CACHE_SECTIONS:
if cache_section != ".":
- print >>sys.stderr, "Unrecognized cache section %s"%(cache_section)
+ print("Unrecognized cache section %s"%(cache_section),
+ file=sys.stderr)
cache_section = "."
for key, name, ext in (('www_abstract_url', 'abstract', 'abstract'),
@@ -639,13 +694,27 @@ class BibTeXEntry:
res.append(".")
res.append("</span><br />\n")
res.append(self.biblio_to_html())
- res.append("<a href='#%s'>·</a>"%url_untranslate(self.key))
+ res.append("\n<br>\n(<a href='#%s'>direct link</a>)"
+ %url_untranslate(self.key))
+ if self.get('url'):
+ res.append(" (<a href='%s'>website</a>)"
+ %htmlize(self['url']))
res.append("</p>")
if self.get('www_remarks'):
res.append("<p class='remarks'>%s</p>"
%htmlize(self['www_remarks']))
+ if self.get('abstract'):
+ res.append("<p class='abstract'>%s</p>"
+ %htmlize(self['abstract']))
+
+ res.append("\n<br>[<a href='#'>Go to top</a>]")
+
+ # We might want to make this invisible (and only
+ # visible in text browsers:
+ res.append("\n<hr>")
+
if imp or draft:
res.append("</div>")
res.append("</li>\n\n")
@@ -671,16 +740,22 @@ def TeXescapeURL(s):
return s
RE_LONE_AMP = re.compile(r'&([^a-z0-9])')
+
RE_LONE_I = re.compile(r'\\i([^a-z0-9])')
+
RE_ACCENT = re.compile(r'\\([\'`~^"c])([^{]|{.})')
+
RE_LIGATURE = re.compile(r'\\(AE|ae|OE|oe|AA|aa|O|o|ss)([^a-z0-9])')
+
ACCENT_MAP = {"'" : 'acute',
"`" : 'grave',
"~" : 'tilde',
"^" : 'circ',
'"' : 'uml',
"c" : 'cedil',}
+
UNICODE_MAP = {'ń' : 'ń',}
+
HTML_LIGATURE_MAP = {
'AE' : 'Æ',
'ae' : 'æ',
@@ -691,7 +766,9 @@ HTML_LIGATURE_MAP = {
'O' : 'Ø',
'o' : 'ø',
'ss' : 'ß',}
+
RE_TEX_CMD = re.compile(r"(?:\\[a-zA-Z@]+|\\.)")
+
RE_PAGE_SPAN = re.compile(r"(\d)--(\d)")
def _unaccent(m):
@@ -705,7 +782,9 @@ def _unlig_html(m):
return "%s%s"%(HTML_LIGATURE_MAP[m.group(1)], m.group(2))
def htmlize(s):
- """Turn a TeX string into good-looking HTML."""
+ """
+ Turn a TeX string into good-looking HTML.
+ """
s = RE_LONE_AMP.sub(lambda m: "&%s" % m.group(1), s)
s = RE_LONE_I.sub(lambda m: "i%s" % m.group(1), s)
s = RE_ACCENT.sub(_unaccent, s)
@@ -719,14 +798,18 @@ def htmlize(s):
return s
def author_url(author):
- """Given an author's name, return a URL for his/her homepage."""
+ """
+ Given an author's name, return a URL for his/her homepage.
+ """
for pat, url in config.AUTHOR_RE_LIST:
if pat.search(author):
return url
return None
def txtize(s):
- """Turn a TeX string into decnent plaintext."""
+ """
+ Turn a TeX string into decent plaintext.
+ """
s = RE_LONE_I.sub(lambda m: "i%s" % m.group(1), s)
s = RE_ACCENT.sub(lambda m: "%s" % m.group(2), s)
s = RE_LIGATURE.sub(lambda m: "%s%s"%m.groups(), s)
@@ -923,7 +1006,7 @@ def parseAuthor(s):
try:
return _parseAuthor(s)
except:
- print >>sys.stderr, "Internal error while parsing author %r"%s
+ print("Internal error while parsing author %r"%s, file=sys.stderr)
raise
def _parseAuthor(s):
@@ -993,12 +1076,17 @@ def _parseAuthor(s):
return parsedAuthors
ALLCHARS = "".join(map(chr, range(256)))
+
PRINTINGCHARS = "\t\n\r"+"".join(map(chr, range(32, 127)))
+
LC_CHARS = "abcdefghijklmnopqrstuvwxyz"
+
SV_DELCHARS = ("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"@")
+
RE_ESCAPED = re.compile(r'\\.')
+
def split_von(f, v, l, x):
in_von = 0
while x:
@@ -1126,11 +1214,12 @@ class Parser:
data.append(" ")
line = it.next()
elif line[0] == '#':
- print >>sys.stderr, "Weird concat on line %s"%it.lineno
+ print("Weird concat on line %s"%it.lineno,
+ file=sys.stderr)
elif line[0] in "},":
if not data:
- print >>sys.stderr, "No data after field on line %s"%(
- it.lineno)
+ print("No data after field on line %s"%(it.lineno),
+ file=sys.stderr)
else:
m = RAW_DATA_RE.match(line)
if m:
@@ -1247,7 +1336,9 @@ class Parser:
line = it.next()
while 1:
# Skip blank lines.
- while not line or line.isspace() or OUTER_COMMENT_RE.match(line):
+ while not line \
+ or line.isspace() \
+ or OUTER_COMMENT_RE.match(line):
line = it.next()
# Get the first line of an entry.
m = ENTRY_BEGIN_RE.match(line)
@@ -1262,17 +1353,22 @@ class Parser:
% it.lineno)
def _advance(it, line):
- while not line or line.isspace() or COMMENT_RE.match(line):
+ while not line \
+ or line.isspace() \
+ or COMMENT_RE.match(line):
line = it.next()
return line
# Matches a comment line outside of an entry.
OUTER_COMMENT_RE = re.compile(r'^\s*[\#\%]')
+
# Matches a comment line inside of an entry.
COMMENT_RE = re.compile(r'^\s*\%')
+
# Matches the start of an entry. group 1 is the type of the entry.
# group 2 is the rest of the line.
ENTRY_BEGIN_RE = re.compile(r'''^\s*\@([^\s\"\%\'\(\)\,\=\{\}]+)(.*)''')
+
# Start of an entry. group 1 is the keyword naming the entry.
BRACE_BEGIN_RE = re.compile(r'\s*\{(.*)')
BRACE_END_RE = re.compile(r'\s*\}(.*)')
@@ -1317,4 +1413,4 @@ if __name__ == '__main__':
for e in r.entries:
if e.type in ("proceedings", "journal"): continue
- print e.to_html()
+ print(e.to_html())