port to python3; add buildbot scripts - gnunetbib - Bibliography (BibTeX, based on AnonBib)

commit 347448c5827b68f1ff40c0cfdd34c7b62f863a0a
parent 867822dfb3f57e83c93f5e48bacd5a591829f148
Author: Martin Schanzenbach <mschanzenbach@posteo.de>
Date:   Thu,  2 Sep 2021 22:54:40 +0200

port to python3; add buildbot scripts

Diffstat:
A .buildbot/build.sh  | 3 +++
A .buildbot/firefly-x86_64-amdepyc_deploy.sh  | 7 +++++++
M BibTeX.py  | 78 +++++++++++++++++++++++++++++++++++++++---------------------------------------
M Makefile  | 2 +-
M anonbib.cfg  | 2 +-
M config.py  | 6 +++---
M rank.py  | 32 ++++++++++++++++----------------
M writeHTML.py  | 17 +++++++++--------

8 files changed, 79 insertions(+), 68 deletions(-)
diff --git a/.buildbot/build.sh b/.buildbot/build.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+make
diff --git a/.buildbot/firefly-x86_64-amdepyc_deploy.sh b/.buildbot/firefly-x86_64-amdepyc_deploy.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+# Deploy bib from buildbot
+
+chmod -R ag+rX .
+DEPLOY_USER="www"
+rsync -a --delete . $DEPLOY_USER@firefly.gnunet.org:~/bib/
diff --git a/BibTeX.py b/BibTeX.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2
+#!/usr/bin/python3
 # Copyright 2003-2008, Nick Mathewson.  See LICENSE for licensing info.
 # Copyright 2018, 2019 ng0 <ng0@n0.is>.
 
@@ -7,9 +7,9 @@ BibTeX.py -- parse and manipulate BibTeX files and entries.
 Based on perl code by Eddie Kohler; heavily modified.
 """
 
-from __future__ import print_function
+
 from future.utils import raise_with_traceback
-import cStringIO
+from io import StringIO ## for Python 3
 import re
 import sys
 import os
@@ -104,8 +104,8 @@ class BibTeX:
                 if cr.entryLine < ent.entryLine:
                     print("Warning: crossref %s used after declaration"%cr.key)
 
-                for k in cr.entries.keys():
-                    if ent.entries.has_key(k):
+                for k in list(cr.entries.keys()):
+                    if k in ent.entries:
                         print("ERROR: %s defined both in %s and in %s"
                               %(k, ent.key, cr.key))
                     else:
@@ -118,9 +118,9 @@ class BibTeX:
             # hack: if no key is required, require "title", since every
             # entry will have a title.
             rk = "title"
-
+        print("rk is " + rk)
         for ent in self.entries:
-            if ent.type in config.OMIT_ENTRIES or not ent.has_key(rk):
+            if ent.type in config.OMIT_ENTRIES or rk not in ent.entries.keys():
                 ent.check()
                 del self.byKey[ent.key.lower()]
             else:
@@ -138,7 +138,7 @@ def buildAuthorTable(entries):
             authorsByLast.setdefault(tuple(a.last), []).append(a)
     # map from author to collapsed author.
     result = {}
-    for k, v in config.COLLAPSE_AUTHORS.items():
+    for k, v in list(config.COLLAPSE_AUTHORS.items()):
         a = parseAuthor(k)[0]
         c = parseAuthor(v)[0]
         result[c] = c
@@ -146,7 +146,7 @@ def buildAuthorTable(entries):
 
     for e in entries:
         for author in e.parsedAuthor:
-            if result.has_key(author):
+            if author in result:
                 continue
 
             c = author
@@ -157,7 +157,7 @@ def buildAuthorTable(entries):
             result[author] = c
 
     if 0:
-        for a, c in result.items():
+        for a, c in list(result.items()):
             if a != c:
                 print("Collapsing authors: %s => %s" % (a, c))
     if 0:
@@ -253,7 +253,7 @@ def splitEntriesByAuthor(entries):
 
             htmlResult[sortkey] = secname
             result.setdefault(sortkey, []).append(ent)
-    sortnames = result.keys()
+    sortnames = list(result.keys())
     sortnames.sort()
     sections = [(htmlResult[n], result[n]) for n in sortnames]
     return sections, url_map
@@ -326,7 +326,7 @@ class BibTeXEntry:
         return self.entries.get(k, v)
 
     def has_key(self, k):
-        return self.entries.has_key(k)
+        return k in self.entries
 
     def __getitem__(self, k):
         return self.entries[k]
@@ -358,25 +358,25 @@ class BibTeXEntry:
         d = ["@%s{%s,\n" % (self.type, self.key)]
         if v:
             df = DISPLAYED_FIELDS[:]
-            for k in self.entries.keys():
+            for k in list(self.entries.keys()):
                 if k not in df:
                     df.append(k)
         else:
             df = DISPLAYED_FIELDS
         for f in df:
-            if not self.entries.has_key(f):
+            if f not in self.entries:
                 continue
             v = self.entries[f]
             if v.startswith("<span class='bad'>"):
                 d.append("%%%%% ERROR: Missing field\n")
                 d.append("%% %s = {?????},\n"%f)
                 continue
-            np = v.translate(ALLCHARS, PRINTINGCHARS)
+            np = v.translate(str.maketrans(ALLCHARS, ALLCHARS, PRINTINGCHARS))
             if np:
                 d.append("%%%%% "+("ERROR: Non-ASCII characters: '%r'\n"%np))
             d.append("  ")
             v = v.replace("&", "&amp;")
-            if invStrings.has_key(v):
+            if v in invStrings:
                 s = "%s = %s,\n" %(f, invStrings[v])
             else:
                 s = "%s = {%s},\n" % (f, v)
@@ -469,7 +469,7 @@ class BibTeXEntry:
                                 "(%r) doesn't start with 'Proceedings of'"
                                 % (self.entryLine, selfself.key, self['booktitle']))
 
-        if self.has_key("pages") and not re.search(r'\d+--\d+', self['pages']):
+        if "pages" in self.entries.keys() and not re.search(r'\d+--\d+', self.entries['pages']):
             errs.append("ERROR (record %s):\t Misformed pages in %s"
                         % (self.entryLine, self.key))
 
@@ -479,8 +479,8 @@ class BibTeXEntry:
                             "it should have a booktitle, not a title."
                             % (self.entryLine, self.key))
 
-        for field, value in self.entries.items():
-            if value.translate(ALLCHARS, PRINTINGCHARS):
+        for field, value in list(self.entries.items()):
+            if value.translate(str.maketrans(ALLCHARS, ALLCHARS, PRINTINGCHARS)):
                 errs.append("ERROR (record %s):\t %s.%s "
                             "has non-ASCII characters"
                             % (self.entryLine, self.key, field))
@@ -806,7 +806,7 @@ def htmlize(s):
     s = unTeXescapeURL(s)
     s = RE_LIGATURE.sub(_unlig_html, s)
     s = RE_TEX_CMD.sub("", s)
-    s = s.translate(ALLCHARS, "{}")
+    s = s.translate(str.maketrans(ALLCHARS, ALLCHARS, "{}"))
     s = RE_PAGE_SPAN.sub(lambda m: "%s-%s"%(m.groups()), s)
     s = s.replace("---", "&mdash;")
     s = s.replace("--", "&ndash;")
@@ -829,7 +829,7 @@ def txtize(s):
     s = RE_ACCENT.sub(lambda m: "%s" % m.group(2), s)
     s = RE_LIGATURE.sub(lambda m: "%s%s"%m.groups(), s)
     s = RE_TEX_CMD.sub("", s)
-    s = s.translate(ALLCHARS, "{}")
+    s = s.translate(str.maketrans(ALLCHARS, ALLCHARS, "{}"))
     return s
 
 PROCEEDINGS_RE = re.compile(r'((?:proceedings|workshop record) of(?: the)? )(.*)', re.I)
@@ -983,7 +983,7 @@ def _split(s, w=79, indent=8):
     first = 1
     indentation = ""
     while len(s) > w:
-        for i in xrange(w-1, 20, -1):
+        for i in range(w-1, 20, -1):
             if s[i] == ' ':
                 r.append(indentation+s[:i])
                 s = s[i+1:]
@@ -1005,14 +1005,14 @@ class FileIter:
         if fname:
             file = open(fname, 'r')
         if string:
-            file = cStringIO.StringIO(string)
+            file = StringIO(string)
         if file:
-            it = iter(file.xreadlines())
+            it = iter(file)
         self.iter = it
         assert self.iter
         self.lineno = 0
-        self._next = it.next
-    def next(self):
+        self._next = it.__next__
+    def __next__(self):
         self.lineno += 1
         return self._next()
 
@@ -1034,7 +1034,7 @@ def _parseAuthor(s):
     while s:
         s = s.strip()
         bracelevel = 0
-        for i in xrange(len(s)):
+        for i in range(len(s)):
             if s[i] == '{':
                 bracelevel += 1
             elif s[i] == '}':
@@ -1090,9 +1090,9 @@ def _parseAuthor(s):
 
     return parsedAuthors
 
-ALLCHARS = "".join(map(chr, range(256)))
+ALLCHARS = "".join(map(chr, list(range(256))))
 
-PRINTINGCHARS = "\t\n\r"+"".join(map(chr, range(32, 127)))
+PRINTINGCHARS = "\t\n\r"+"".join(map(chr, list(range(32, 127))))
 
 LC_CHARS = "abcdefghijklmnopqrstuvwxyz"
 
@@ -1108,10 +1108,10 @@ def split_von(f, v, l, x):
         tt = t = x[0]
         del x[0]
         if tt[:2] == '{\\':
-            tt = tt.translate(ALLCHARS, SV_DELCHARS)
+            tt = tt.translate(str.maketrans(ALLCHARS, ALLCHARS, SV_DELCHARS))
             tt = RE_ESCAPED.sub("", tt)
-            tt = tt.translate(ALLCHARS, "{}")
-        if tt.translate(ALLCHARS, LC_CHARS) == "":
+            tt = tt.translate(str.maketrans(ALLCHARS, ALLCHARS, "{}"))
+        if tt.translate(str.maketrans(ALLCHARS, ALLCHARS, LC_CHARS)) == "":
             v.append(t)
             in_von = 1
         elif in_von and f is not None:
@@ -1145,7 +1145,7 @@ class Parser:
         self.strings.update(initial_strings)
         self.newStrings = {}
         self.invStrings = {}
-        for k, v in config.INITIAL_STRINGS.items():
+        for k, v in list(config.INITIAL_STRINGS.items()):
             self.invStrings[v] = k
         self.fileiter = fileiter
         if result is None:
@@ -1200,7 +1200,7 @@ class Parser:
                         continue
                     data.append(line)
                     data.append(" ")
-                    line = it.next()
+                    line = next(it)
                 self.litStringLine = 0
             elif line[0] == '{':
                 bracelevel += 1
@@ -1227,7 +1227,7 @@ class Parser:
                         #print bracelevel, "C", repr(line)
                         data.append(line)
                         data.append(" ")
-                        line = it.next()
+                        line = next(it)
             elif line[0] == '#':
                 print("Weird concat on line %s"%it.lineno,
                       file=sys.stderr)
@@ -1321,7 +1321,7 @@ class Parser:
         else:
             key = v[0]
             d = {}
-            for i in xrange(1, len(v), 2):
+            for i in range(1, len(v), 2):
                 d[v[i].lower()] = v[i+1]
             ent = BibTeXEntry(self.curEntType, key, d)
             ent.entryLine = self.entryLine
@@ -1348,13 +1348,13 @@ class Parser:
 
     def _parse(self):
         it = self.fileiter
-        line = it.next()
+        line = next(it)
         while 1:
             # Skip blank lines.
             while not line \
                   or line.isspace() \
                   or OUTER_COMMENT_RE.match(line):
-                line = it.next()
+                line = next(it)
             # Get the first line of an entry.
             m = ENTRY_BEGIN_RE.match(line)
             if m:
@@ -1371,7 +1371,7 @@ def _advance(it, line):
     while not line \
           or line.isspace() \
           or COMMENT_RE.match(line):
-        line = it.next()
+        line = next(it)
     return line
 
 # Matches a comment line outside of an entry.
diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
-PYTHON=python2
+PYTHON=python3
 VERSION=0.3-dev
 
 all:
diff --git a/anonbib.cfg b/anonbib.cfg
@@ -234,4 +234,4 @@ TAG_DIRECTORIES = {'': "full", "selected": ""}
 MULTI_VAL_FIELDS = ["www_section"]
 # Make cached stuff group-writable.  Make sure that your cache directories
 # are sticky!
-CACHE_UMASK = 002
+CACHE_UMASK = 0o002
diff --git a/config.py b/config.py
@@ -22,7 +22,7 @@ def load(cfgFile):
     Load config FILE
     """
     mod = {}
-    execfile(cfgFile, mod)
+    exec(compile(open(cfgFile, "rb").read(), cfgFile, 'exec'), mod)
     for _k in _KEYS:
         try:
             globals()[_k] = mod[_k]
@@ -31,7 +31,7 @@ def load(cfgFile):
 
     INITIAL_STRINGS.update(_EXTRA_INITIAL_STRINGS)
     AUTHOR_RE_LIST[:] = [
-        (re.compile(k, re.I), v,) for k, v in AUTHOR_URLS.items()
+        (re.compile(k, re.I), v,) for k, v in list(AUTHOR_URLS.items())
         ]
 
     NO_COLLAPSE_AUTHORS_RE_LIST[:] = [
@@ -39,7 +39,7 @@ def load(cfgFile):
         ]
 
     ALPHABETIZE_AUTHOR_AS_RE_LIST[:] = [
-        (re.compile(k, re.I), v,) for k, v in ALPHABETIZE_AUTHOR_AS.items()
+        (re.compile(k, re.I), v,) for k, v in list(ALPHABETIZE_AUTHOR_AS.items())
         ]
 
 _EXTRA_INITIAL_STRINGS = {
diff --git a/rank.py b/rank.py
@@ -32,15 +32,15 @@ def cache_folder():
    return r
 
 import re
-from urllib2 import urlopen, build_opener
-from urllib import quote
+from urllib.request import urlopen, build_opener
+from urllib.parse import quote
 from datetime import date
 import hashlib
 
 # A more handy hash
 def md5h(s):
    m = hashlib.md5()
-   m.update(s)
+   m.update(s.encode('utf-8'))
    return m.hexdigest()
 
 format_tested = 0
@@ -66,17 +66,17 @@ def getPageForTitle(title, cache=True, update=True, save=True):
    if exists(join(cache_folder(), md5h(url))) and cache:
       return url, file(join(cache_folder(), md5h(url)),'r').read()
    elif update:
-      print "Downloading rank for %r."%title
+      print(("Downloading rank for %r."%title))
 
       # Make a custom user agent (so that we are not filtered by Google)!
       opener = build_opener()
       opener.addheaders = [('User-agent', 'Anon.Bib.0.1')]
 
-      print "connecting..."
+      print("connecting...")
       connection = opener.open(url)
-      print "reading"
+      print("reading")
       page = connection.read()
-      print "done"
+      print("done")
       if save:
          file(join(cache_folder(), md5h(url)),'w').write(page)
       return url, page
@@ -140,20 +140,20 @@ def get_rank_html(title, years=None, base_url=".", update=True,
 def TestScholarFormat():
    # We need to ensure that Google Scholar does not change its page format under our feet
    # Use some cases to check if all is good
-   print "Checking google scholar formats..."
+   print("Checking google scholar formats...")
    stopAndGoCites = getCite("Stop-and-Go MIXes: Providing Probabilistic Anonymity in an Open System", False)[0]
    dragonCites = getCite("Mixes protected by Dragons and Pixies: an empirical study", False, save=False)[0]
 
    if stopAndGoCites in (0, None):
-      print """OOPS.\n
+      print("""OOPS.\n
 It looks like Google Scholar changed their URL format or their output format.
-I went to count the cites for the Stop-and-Go MIXes paper, and got nothing."""
+I went to count the cites for the Stop-and-Go MIXes paper, and got nothing.""")
       sys.exit(1)
 
    if dragonCites != None:
-      print """OOPS.\n
+      print("""OOPS.\n
 It looks like Google Scholar changed their URL format or their output format.
-I went to count the cites for a fictitious paper, and found some."""
+I went to count the cites for a fictitious paper, and found some.""")
       sys.exit(1)
 
 def urlIsUseless(u):
@@ -182,7 +182,7 @@ if __name__ == '__main__':
    bib = BibTeX.parseFile(config.MASTER_BIB)
    remove_old()
 
-   print "Downloading missing ranks."
+   print("Downloading missing ranks.")
    for ent in bib.entries:
       getCite(ent['title'], cache=True, update=True)
 
@@ -190,13 +190,13 @@ if __name__ == '__main__':
       for ent in bib.entries:
          haveOne = False
          for utype in URLTYPES:
-            if ent.has_key("www_%s_url"%utype):
+            if "www_%s_url"%utype in ent:
                haveOne = True
                break
          if haveOne:
             continue
-         print ent.key, "has no URLs given."
+         print((ent.key, "has no URLs given."))
          urls = [ u for u in getPaperURLs(ent['title']) if not urlIsUseless(u) ]
          for u in urls:
-            print "\t", u
+            print(("\t", u))
 
diff --git a/writeHTML.py b/writeHTML.py
@@ -1,12 +1,13 @@
-#!/usr/bin/python2
+#!/usr/bin/python3
 # Copyright 2003-2008, Nick Mathewson.  See LICENSE for licensing info.
 # Copyright 2018, 2019 ng0 <ng0@n0.is>
 
 """
 Generate indices by author, topic, date, and BibTeX key.
 """
-from __future__ import print_function
+
 from future.utils import raise_with_traceback
+from io import BytesIO ## for Python 3
 import sys
 import re
 import os
@@ -16,7 +17,7 @@ import config
 
 
 assert sys.version_info[:3] >= (2, 2, 0)
-os.umask(022)
+os.umask(0o22)
 
 def getTemplate(name):
     template_file = open(name)
@@ -81,7 +82,7 @@ def writeHTML(f, sections, sectionType, fieldName, choices,
 
     #
     tagListStr = []
-    st = config.TAG_SHORT_TITLES.keys()
+    st = list(config.TAG_SHORT_TITLES.keys())
     st.sort()
     root = "../"*pathLength(config.TAG_DIRECTORIES[tag])
     if root == "": root = "."
@@ -144,7 +145,7 @@ def writePageSet(config, bib, tag):
     cache_url_path = BibTeX.smartJoin("../"*pathLength(tagdir),
                                       config.CACHE_DIR)
     if not os.path.exists(outdir):
-        os.makedirs(outdir, 0755)
+        os.makedirs(outdir, 0o755)
     ##### Sorted views:
 
     ## By topic.
@@ -186,7 +187,7 @@ def writePageSet(config, bib, tag):
     except ValueError:
         last_year = int(entries[-2][1][0].get('year'))
 
-    years = map(str, range(first_year, last_year+1))
+    years = list(map(str, list(range(first_year, last_year+1))))
     if entries[-1][0] == 'Unknown':
         years.append("Unknown")
 
@@ -252,7 +253,7 @@ def writePageSet(config, bib, tag):
     for ent in entries:
         bib_file_dir = os.path.join(biboutdir, ent.key)
         if not os.path.exists(bib_file_dir):
-            os.makedirs(bib_file_dir, 0755)
+            os.makedirs(bib_file_dir, 0o755)
         single_bib_file = open(os.path.join(bib_file_dir,
                                             "record.bib"),
                                'w')
@@ -270,5 +271,5 @@ if __name__ == '__main__':
 
     bib = BibTeX.parseFile(config.MASTER_BIB)
 
-    for tag in config.TAG_DIRECTORIES.keys():
+    for tag in list(config.TAG_DIRECTORIES.keys()):
         writePageSet(config, bib, tag)

	gnunetbib Bibliography (BibTeX, based on AnonBib)
	Log \| Files \| Refs \| README \| LICENSE

A	.buildbot/build.sh	\|	3	+++
A	.buildbot/firefly-x86_64-amdepyc_deploy.sh	\|	7	+++++++
M	BibTeX.py	\|	78	+++++++++++++++++++++++++++++++++++++++---------------------------------------
M	Makefile	\|	2	+-
M	anonbib.cfg	\|	2	+-
M	config.py	\|	6	+++---
M	rank.py	\|	32	++++++++++++++++----------------
M	writeHTML.py	\|	17	+++++++++--------