gnunetbib

Bibliography (BibTeX, based on AnonBib)
Log | Files | Refs | README | LICENSE

commit 347448c5827b68f1ff40c0cfdd34c7b62f863a0a
parent 867822dfb3f57e83c93f5e48bacd5a591829f148
Author: Martin Schanzenbach <mschanzenbach@posteo.de>
Date:   Thu,  2 Sep 2021 22:54:40 +0200

port to python3; add buildbot scripts

Diffstat:
A.buildbot/build.sh | 3+++
A.buildbot/firefly-x86_64-amdepyc_deploy.sh | 7+++++++
MBibTeX.py | 78+++++++++++++++++++++++++++++++++++++++---------------------------------------
MMakefile | 2+-
Manonbib.cfg | 2+-
Mconfig.py | 6+++---
Mrank.py | 32++++++++++++++++----------------
MwriteHTML.py | 17+++++++++--------
8 files changed, 79 insertions(+), 68 deletions(-)

diff --git a/.buildbot/build.sh b/.buildbot/build.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +make diff --git a/.buildbot/firefly-x86_64-amdepyc_deploy.sh b/.buildbot/firefly-x86_64-amdepyc_deploy.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +# Deploy bib from buildbot + +chmod -R ag+rX . +DEPLOY_USER="www" +rsync -a --delete . $DEPLOY_USER@firefly.gnunet.org:~/bib/ diff --git a/BibTeX.py b/BibTeX.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python3 # Copyright 2003-2008, Nick Mathewson. See LICENSE for licensing info. # Copyright 2018, 2019 ng0 <ng0@n0.is>. @@ -7,9 +7,9 @@ BibTeX.py -- parse and manipulate BibTeX files and entries. Based on perl code by Eddie Kohler; heavily modified. """ -from __future__ import print_function + from future.utils import raise_with_traceback -import cStringIO +from io import StringIO ## for Python 3 import re import sys import os @@ -104,8 +104,8 @@ class BibTeX: if cr.entryLine < ent.entryLine: print("Warning: crossref %s used after declaration"%cr.key) - for k in cr.entries.keys(): - if ent.entries.has_key(k): + for k in list(cr.entries.keys()): + if k in ent.entries: print("ERROR: %s defined both in %s and in %s" %(k, ent.key, cr.key)) else: @@ -118,9 +118,9 @@ class BibTeX: # hack: if no key is required, require "title", since every # entry will have a title. rk = "title" - + print("rk is " + rk) for ent in self.entries: - if ent.type in config.OMIT_ENTRIES or not ent.has_key(rk): + if ent.type in config.OMIT_ENTRIES or rk not in ent.entries.keys(): ent.check() del self.byKey[ent.key.lower()] else: @@ -138,7 +138,7 @@ def buildAuthorTable(entries): authorsByLast.setdefault(tuple(a.last), []).append(a) # map from author to collapsed author. result = {} - for k, v in config.COLLAPSE_AUTHORS.items(): + for k, v in list(config.COLLAPSE_AUTHORS.items()): a = parseAuthor(k)[0] c = parseAuthor(v)[0] result[c] = c @@ -146,7 +146,7 @@ def buildAuthorTable(entries): for e in entries: for author in e.parsedAuthor: - if result.has_key(author): + if author in result: continue c = author @@ -157,7 +157,7 @@ def buildAuthorTable(entries): result[author] = c if 0: - for a, c in result.items(): + for a, c in list(result.items()): if a != c: print("Collapsing authors: %s => %s" % (a, c)) if 0: @@ -253,7 +253,7 @@ def splitEntriesByAuthor(entries): htmlResult[sortkey] = secname result.setdefault(sortkey, []).append(ent) - sortnames = result.keys() + sortnames = list(result.keys()) sortnames.sort() sections = [(htmlResult[n], result[n]) for n in sortnames] return sections, url_map @@ -326,7 +326,7 @@ class BibTeXEntry: return self.entries.get(k, v) def has_key(self, k): - return self.entries.has_key(k) + return k in self.entries def __getitem__(self, k): return self.entries[k] @@ -358,25 +358,25 @@ class BibTeXEntry: d = ["@%s{%s,\n" % (self.type, self.key)] if v: df = DISPLAYED_FIELDS[:] - for k in self.entries.keys(): + for k in list(self.entries.keys()): if k not in df: df.append(k) else: df = DISPLAYED_FIELDS for f in df: - if not self.entries.has_key(f): + if f not in self.entries: continue v = self.entries[f] if v.startswith("<span class='bad'>"): d.append("%%%%% ERROR: Missing field\n") d.append("%% %s = {?????},\n"%f) continue - np = v.translate(ALLCHARS, PRINTINGCHARS) + np = v.translate(str.maketrans(ALLCHARS, ALLCHARS, PRINTINGCHARS)) if np: d.append("%%%%% "+("ERROR: Non-ASCII characters: '%r'\n"%np)) d.append(" ") v = v.replace("&", "&amp;") - if invStrings.has_key(v): + if v in invStrings: s = "%s = %s,\n" %(f, invStrings[v]) else: s = "%s = {%s},\n" % (f, v) @@ -469,7 +469,7 @@ class BibTeXEntry: "(%r) doesn't start with 'Proceedings of'" % (self.entryLine, selfself.key, self['booktitle'])) - if self.has_key("pages") and not re.search(r'\d+--\d+', self['pages']): + if "pages" in self.entries.keys() and not re.search(r'\d+--\d+', self.entries['pages']): errs.append("ERROR (record %s):\t Misformed pages in %s" % (self.entryLine, self.key)) @@ -479,8 +479,8 @@ class BibTeXEntry: "it should have a booktitle, not a title." % (self.entryLine, self.key)) - for field, value in self.entries.items(): - if value.translate(ALLCHARS, PRINTINGCHARS): + for field, value in list(self.entries.items()): + if value.translate(str.maketrans(ALLCHARS, ALLCHARS, PRINTINGCHARS)): errs.append("ERROR (record %s):\t %s.%s " "has non-ASCII characters" % (self.entryLine, self.key, field)) @@ -806,7 +806,7 @@ def htmlize(s): s = unTeXescapeURL(s) s = RE_LIGATURE.sub(_unlig_html, s) s = RE_TEX_CMD.sub("", s) - s = s.translate(ALLCHARS, "{}") + s = s.translate(str.maketrans(ALLCHARS, ALLCHARS, "{}")) s = RE_PAGE_SPAN.sub(lambda m: "%s-%s"%(m.groups()), s) s = s.replace("---", "&mdash;") s = s.replace("--", "&ndash;") @@ -829,7 +829,7 @@ def txtize(s): s = RE_ACCENT.sub(lambda m: "%s" % m.group(2), s) s = RE_LIGATURE.sub(lambda m: "%s%s"%m.groups(), s) s = RE_TEX_CMD.sub("", s) - s = s.translate(ALLCHARS, "{}") + s = s.translate(str.maketrans(ALLCHARS, ALLCHARS, "{}")) return s PROCEEDINGS_RE = re.compile(r'((?:proceedings|workshop record) of(?: the)? )(.*)', re.I) @@ -983,7 +983,7 @@ def _split(s, w=79, indent=8): first = 1 indentation = "" while len(s) > w: - for i in xrange(w-1, 20, -1): + for i in range(w-1, 20, -1): if s[i] == ' ': r.append(indentation+s[:i]) s = s[i+1:] @@ -1005,14 +1005,14 @@ class FileIter: if fname: file = open(fname, 'r') if string: - file = cStringIO.StringIO(string) + file = StringIO(string) if file: - it = iter(file.xreadlines()) + it = iter(file) self.iter = it assert self.iter self.lineno = 0 - self._next = it.next - def next(self): + self._next = it.__next__ + def __next__(self): self.lineno += 1 return self._next() @@ -1034,7 +1034,7 @@ def _parseAuthor(s): while s: s = s.strip() bracelevel = 0 - for i in xrange(len(s)): + for i in range(len(s)): if s[i] == '{': bracelevel += 1 elif s[i] == '}': @@ -1090,9 +1090,9 @@ def _parseAuthor(s): return parsedAuthors -ALLCHARS = "".join(map(chr, range(256))) +ALLCHARS = "".join(map(chr, list(range(256)))) -PRINTINGCHARS = "\t\n\r"+"".join(map(chr, range(32, 127))) +PRINTINGCHARS = "\t\n\r"+"".join(map(chr, list(range(32, 127)))) LC_CHARS = "abcdefghijklmnopqrstuvwxyz" @@ -1108,10 +1108,10 @@ def split_von(f, v, l, x): tt = t = x[0] del x[0] if tt[:2] == '{\\': - tt = tt.translate(ALLCHARS, SV_DELCHARS) + tt = tt.translate(str.maketrans(ALLCHARS, ALLCHARS, SV_DELCHARS)) tt = RE_ESCAPED.sub("", tt) - tt = tt.translate(ALLCHARS, "{}") - if tt.translate(ALLCHARS, LC_CHARS) == "": + tt = tt.translate(str.maketrans(ALLCHARS, ALLCHARS, "{}")) + if tt.translate(str.maketrans(ALLCHARS, ALLCHARS, LC_CHARS)) == "": v.append(t) in_von = 1 elif in_von and f is not None: @@ -1145,7 +1145,7 @@ class Parser: self.strings.update(initial_strings) self.newStrings = {} self.invStrings = {} - for k, v in config.INITIAL_STRINGS.items(): + for k, v in list(config.INITIAL_STRINGS.items()): self.invStrings[v] = k self.fileiter = fileiter if result is None: @@ -1200,7 +1200,7 @@ class Parser: continue data.append(line) data.append(" ") - line = it.next() + line = next(it) self.litStringLine = 0 elif line[0] == '{': bracelevel += 1 @@ -1227,7 +1227,7 @@ class Parser: #print bracelevel, "C", repr(line) data.append(line) data.append(" ") - line = it.next() + line = next(it) elif line[0] == '#': print("Weird concat on line %s"%it.lineno, file=sys.stderr) @@ -1321,7 +1321,7 @@ class Parser: else: key = v[0] d = {} - for i in xrange(1, len(v), 2): + for i in range(1, len(v), 2): d[v[i].lower()] = v[i+1] ent = BibTeXEntry(self.curEntType, key, d) ent.entryLine = self.entryLine @@ -1348,13 +1348,13 @@ class Parser: def _parse(self): it = self.fileiter - line = it.next() + line = next(it) while 1: # Skip blank lines. while not line \ or line.isspace() \ or OUTER_COMMENT_RE.match(line): - line = it.next() + line = next(it) # Get the first line of an entry. m = ENTRY_BEGIN_RE.match(line) if m: @@ -1371,7 +1371,7 @@ def _advance(it, line): while not line \ or line.isspace() \ or COMMENT_RE.match(line): - line = it.next() + line = next(it) return line # Matches a comment line outside of an entry. diff --git a/Makefile b/Makefile @@ -1,4 +1,4 @@ -PYTHON=python2 +PYTHON=python3 VERSION=0.3-dev all: diff --git a/anonbib.cfg b/anonbib.cfg @@ -234,4 +234,4 @@ TAG_DIRECTORIES = {'': "full", "selected": ""} MULTI_VAL_FIELDS = ["www_section"] # Make cached stuff group-writable. Make sure that your cache directories # are sticky! -CACHE_UMASK = 002 +CACHE_UMASK = 0o002 diff --git a/config.py b/config.py @@ -22,7 +22,7 @@ def load(cfgFile): Load config FILE """ mod = {} - execfile(cfgFile, mod) + exec(compile(open(cfgFile, "rb").read(), cfgFile, 'exec'), mod) for _k in _KEYS: try: globals()[_k] = mod[_k] @@ -31,7 +31,7 @@ def load(cfgFile): INITIAL_STRINGS.update(_EXTRA_INITIAL_STRINGS) AUTHOR_RE_LIST[:] = [ - (re.compile(k, re.I), v,) for k, v in AUTHOR_URLS.items() + (re.compile(k, re.I), v,) for k, v in list(AUTHOR_URLS.items()) ] NO_COLLAPSE_AUTHORS_RE_LIST[:] = [ @@ -39,7 +39,7 @@ def load(cfgFile): ] ALPHABETIZE_AUTHOR_AS_RE_LIST[:] = [ - (re.compile(k, re.I), v,) for k, v in ALPHABETIZE_AUTHOR_AS.items() + (re.compile(k, re.I), v,) for k, v in list(ALPHABETIZE_AUTHOR_AS.items()) ] _EXTRA_INITIAL_STRINGS = { diff --git a/rank.py b/rank.py @@ -32,15 +32,15 @@ def cache_folder(): return r import re -from urllib2 import urlopen, build_opener -from urllib import quote +from urllib.request import urlopen, build_opener +from urllib.parse import quote from datetime import date import hashlib # A more handy hash def md5h(s): m = hashlib.md5() - m.update(s) + m.update(s.encode('utf-8')) return m.hexdigest() format_tested = 0 @@ -66,17 +66,17 @@ def getPageForTitle(title, cache=True, update=True, save=True): if exists(join(cache_folder(), md5h(url))) and cache: return url, file(join(cache_folder(), md5h(url)),'r').read() elif update: - print "Downloading rank for %r."%title + print(("Downloading rank for %r."%title)) # Make a custom user agent (so that we are not filtered by Google)! opener = build_opener() opener.addheaders = [('User-agent', 'Anon.Bib.0.1')] - print "connecting..." + print("connecting...") connection = opener.open(url) - print "reading" + print("reading") page = connection.read() - print "done" + print("done") if save: file(join(cache_folder(), md5h(url)),'w').write(page) return url, page @@ -140,20 +140,20 @@ def get_rank_html(title, years=None, base_url=".", update=True, def TestScholarFormat(): # We need to ensure that Google Scholar does not change its page format under our feet # Use some cases to check if all is good - print "Checking google scholar formats..." + print("Checking google scholar formats...") stopAndGoCites = getCite("Stop-and-Go MIXes: Providing Probabilistic Anonymity in an Open System", False)[0] dragonCites = getCite("Mixes protected by Dragons and Pixies: an empirical study", False, save=False)[0] if stopAndGoCites in (0, None): - print """OOPS.\n + print("""OOPS.\n It looks like Google Scholar changed their URL format or their output format. -I went to count the cites for the Stop-and-Go MIXes paper, and got nothing.""" +I went to count the cites for the Stop-and-Go MIXes paper, and got nothing.""") sys.exit(1) if dragonCites != None: - print """OOPS.\n + print("""OOPS.\n It looks like Google Scholar changed their URL format or their output format. -I went to count the cites for a fictitious paper, and found some.""" +I went to count the cites for a fictitious paper, and found some.""") sys.exit(1) def urlIsUseless(u): @@ -182,7 +182,7 @@ if __name__ == '__main__': bib = BibTeX.parseFile(config.MASTER_BIB) remove_old() - print "Downloading missing ranks." + print("Downloading missing ranks.") for ent in bib.entries: getCite(ent['title'], cache=True, update=True) @@ -190,13 +190,13 @@ if __name__ == '__main__': for ent in bib.entries: haveOne = False for utype in URLTYPES: - if ent.has_key("www_%s_url"%utype): + if "www_%s_url"%utype in ent: haveOne = True break if haveOne: continue - print ent.key, "has no URLs given." + print((ent.key, "has no URLs given.")) urls = [ u for u in getPaperURLs(ent['title']) if not urlIsUseless(u) ] for u in urls: - print "\t", u + print(("\t", u)) diff --git a/writeHTML.py b/writeHTML.py @@ -1,12 +1,13 @@ -#!/usr/bin/python2 +#!/usr/bin/python3 # Copyright 2003-2008, Nick Mathewson. See LICENSE for licensing info. # Copyright 2018, 2019 ng0 <ng0@n0.is> """ Generate indices by author, topic, date, and BibTeX key. """ -from __future__ import print_function + from future.utils import raise_with_traceback +from io import BytesIO ## for Python 3 import sys import re import os @@ -16,7 +17,7 @@ import config assert sys.version_info[:3] >= (2, 2, 0) -os.umask(022) +os.umask(0o22) def getTemplate(name): template_file = open(name) @@ -81,7 +82,7 @@ def writeHTML(f, sections, sectionType, fieldName, choices, # tagListStr = [] - st = config.TAG_SHORT_TITLES.keys() + st = list(config.TAG_SHORT_TITLES.keys()) st.sort() root = "../"*pathLength(config.TAG_DIRECTORIES[tag]) if root == "": root = "." @@ -144,7 +145,7 @@ def writePageSet(config, bib, tag): cache_url_path = BibTeX.smartJoin("../"*pathLength(tagdir), config.CACHE_DIR) if not os.path.exists(outdir): - os.makedirs(outdir, 0755) + os.makedirs(outdir, 0o755) ##### Sorted views: ## By topic. @@ -186,7 +187,7 @@ def writePageSet(config, bib, tag): except ValueError: last_year = int(entries[-2][1][0].get('year')) - years = map(str, range(first_year, last_year+1)) + years = list(map(str, list(range(first_year, last_year+1)))) if entries[-1][0] == 'Unknown': years.append("Unknown") @@ -252,7 +253,7 @@ def writePageSet(config, bib, tag): for ent in entries: bib_file_dir = os.path.join(biboutdir, ent.key) if not os.path.exists(bib_file_dir): - os.makedirs(bib_file_dir, 0755) + os.makedirs(bib_file_dir, 0o755) single_bib_file = open(os.path.join(bib_file_dir, "record.bib"), 'w') @@ -270,5 +271,5 @@ if __name__ == '__main__': bib = BibTeX.parseFile(config.MASTER_BIB) - for tag in config.TAG_DIRECTORIES.keys(): + for tag in list(config.TAG_DIRECTORIES.keys()): writePageSet(config, bib, tag)