From 98690996582e0e511a8e8611b3db5080be6e75b0 Mon Sep 17 00:00:00 2001 From: ng0 Date: Tue, 12 Nov 2019 17:28:30 +0000 Subject: split up template.py, make site generation a class. --- inc/fileproc.py | 85 +++++++++++++++++++++++++++++++++++++ inc/site.py | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ inc/sitemap.py | 20 +++++++++ inc/sum.py | 35 ++++++++++++++++ inc/textproc.py | 35 ++++++++++++++++ 5 files changed, 302 insertions(+) create mode 100644 inc/fileproc.py create mode 100644 inc/site.py create mode 100644 inc/sitemap.py create mode 100644 inc/sum.py create mode 100644 inc/textproc.py (limited to 'inc') diff --git a/inc/fileproc.py b/inc/fileproc.py new file mode 100644 index 00000000..435078bc --- /dev/null +++ b/inc/fileproc.py @@ -0,0 +1,85 @@ +from pathlib import Path + +def copy_files(kind, conf, locale, inlist, ptarget): + o = Path(ptarget) + for item in conf[inlist]: + i = Path(kind + "/" + item["file"]) + # print(i) + for t in item["targets"]: + d_loc = o / locale / t + d = o / t + # print(d) + if i.is_file() is not False: + d_loc.write_text(i.read_text()) + print("copied " + str(i) + " to " + str(d_loc) + "...") + d.write_text(i.read_text()) + print("copied " + str(i) + " to " + str(d) + "...") + + +def rm_rf(directory): + directory = Path(directory) + for child in directory.glob('*'): + if child.is_file(): + child.unlink() + else: + rm_rf(child) + # directory.rmdir() + + +def fileop(infile, outfile, action): + """ + infile: inputfile, Path object + outfile: outputfile, Path object + action: action if any, String + """ + i = Path(infile) + o = Path(outfile) + outdir = Path("rendered") + if i.is_file() is not False: + if action == "copy": + # Write content of i to o. + o.write_text(i.read_text()) + if action == "link": + o.symlink_to(i) + + +def write_name(filename, infile, locale, replacer): + return "./rendered/" + locale + "/" + infile.replace(replacer, + '').rstrip(".j2") + + +def localized(filename, locale, *args): + if len(args) == 0: + return "../" + locale + "/" + filename + ext = kwargs.get('ext', None) + if ext is not None: + lf = filename + "." + locale + "." + ext + lp = Path(lf) + if locale == "en" or not lp.is_file(): + return "../" + filename + "." + ext + else: + return "../" + lf + + +# This generates and switches sites generations, preventing +# in-place modification of the website. +# * save old generation directory name +# * jinja2 creates content in "rendered" (happened before calling this function) +# * calculate sum of "rendered" +# * move "rendered" to out/$sum +# * remove symlink "html_dir" +# * symlink out/$sum to "html_dir" +# * delete old generation directory +def generation_dir(htmldir): + oldgen = Path(htmldir).resolve() + # precondition: jinja2 has created the files in "rendered". + newgen = Path("rendered") + newgen_sum = walksum(newgen) + outdir = Path("out") + outdir.mkdir(parents=True, exist_ok=True) + newgen_target = Path("out") / newgen_sum + newgen.rename(newgen_target) + html = Path(htmldir) + html.unlink() + fileop(newgen, html, "link") + rm_rf(oldgen) diff --git a/inc/site.py b/inc/site.py new file mode 100644 index 00000000..67e5afa8 --- /dev/null +++ b/inc/site.py @@ -0,0 +1,127 @@ +import os +import os.path +import sys +import re +import gettext +import glob +import codecs +import jinja2 +import hashlib +from pathlib import Path, PurePosixPath +from ruamel.yaml import YAML +import inc.i18nfix +from inc.textproc import cut_news_text +from inc.fileproc import copy_files + + +class gen_site: + def load_config(self, name="www.yml"): + yaml = YAML(typ='safe') + site_configfile = Path(name) + return yaml.load(site_configfile) + + def gen_abstract(self, conf, name, member, pages, length): + for item in conf[name]: + item[member] = cut_news_text(item[pages], length) + + def run(self, root, conf, env): + # os.chdir("..") + print(os.getcwd()) + root = "../" + root + for in_file in glob.glob(root + "/*.j2"): + name, ext = re.match(r"(.*)\.([^.]+)$", + in_file.rstrip(".j2")).groups() + tmpl = env.get_template(in_file) + + def self_localized(other_locale): + """ + Return URL for the current page in another locale. + """ + return "../" + other_locale + "/" + in_file.replace( + root + '/', '').rstrip(".j2") + + def url_localized(filename): + if root == "news": + return "../../" + locale + "/" + filename + else: + return "../" + locale + "/" + filename + + def url_static(filename): + if root == "news": + return "../../static/" + filename + else: + return "../static/" + filename + + def url_dist(filename): + if root == "news": + return "../../dist/" + filename + else: + return "../dist/" + filename + + def svg_localized(filename): + lf = filename + "." + locale + ".svg" + if locale == "en" or not Path(lf).is_file(): + return "../" + filename + ".svg" + else: + return "../" + lf + + def url(x): + # TODO: look at the app root environment variable + # TODO: check if file exists + #if root == "news": + # return "../" + "../" + x + #else: + # return "../" + x + return "../" + x + + for l in glob.glob("locale/*/"): + locale = os.path.basename(l[:-1]) + + tr = gettext.translation("messages", + localedir="locale", + languages=[locale]) + + tr.gettext = i18nfix.wrap_gettext(tr.gettext) + + env.install_gettext_translations(tr, newstyle=True) + + content = tmpl.render(lang=locale, + lang_full=conf["langs_full"][locale], + url=url, + meetingnotesdata=conf["meetingnotes"], + newsdata=conf["newsposts"], + videosdata=conf["videoslist"], + self_localized=self_localized, + url_localized=url_localized, + url_static=url_static, + url_dist=url_dist, + svg_localized=svg_localized, + filename=name + "." + ext) + + if root == "news": + out_name = "./rendered/" + locale + "/" + root + "/" + in_file.replace( + root + '/', '').rstrip(".j2") + else: + out_name = "./rendered/" + locale + "/" + in_file.replace( + root + '/', '').rstrip(".j2") + + outdir = Path("rendered") + if outdir.exists() is False: + sys.exit(1) + + if root == "news": + langdir = outdir / locale / root + else: + langdir = outdir / locale + + try: + langdir.mkdir(parents=True, exist_ok=True) + except e as FileNotFoundError: + print(e) + + with codecs.open(out_name, "w", encoding='utf-8') as f: + try: + print(Path.cwd()) + f.write(content) + except e as Error: + print(e) diff --git a/inc/sitemap.py b/inc/sitemap.py new file mode 100644 index 00000000..e050c77d --- /dev/null +++ b/inc/sitemap.py @@ -0,0 +1,20 @@ +import os +from pathlib import Path, PurePosixPath + +def sitemap_tree(path): + tree = dict(name=PurePosixPath(path).name, children=[]) + try: + mylist = os.listdir(path) + except OSError: + pass + else: + for name in mylist: + fn = os.path.join(path, name) + if os.path.isdir(fn): + tree['children'].append(sitemap_tree(fn)) + else: + np = os.path.join(name) + if np.startswith('/'): + np = np[1:] + tree['children'].append(dict(name=np)) + return tree diff --git a/inc/sum.py b/inc/sum.py new file mode 100644 index 00000000..9addf78f --- /dev/null +++ b/inc/sum.py @@ -0,0 +1,35 @@ +def sha256sum(_): + sha256 = hashlib.sha256() + with io.open(_, mode="rb") as fd: + content = fd.read() + sha256.update(content) + return sha256.hexdigest() + + +def walksum(_): + sha256 = hashlib.sha256() + x = Path(_) + if not x.exists(): + return -1 + try: + for root, directories, files in os.walk(_): + for names in sorted(files): + filepath = os.path.join(root, names) + try: + fl = open(filepath, 'rb') + except: + fl.close() + continue + while 1: + buf = fl.read(4096) + if not buf: + break + sha256.update(hashlib.sha256(buf).hexdigest()) + fl.close() + except: + import traceback + traceback.print_exc() + return -2 + return sha256.hexdigest() + + diff --git a/inc/textproc.py b/inc/textproc.py new file mode 100644 index 00000000..228518b2 --- /dev/null +++ b/inc/textproc.py @@ -0,0 +1,35 @@ +import html.parser +from bs4 import BeautifulSoup + +class extractText(html.parser.HTMLParser): + def __init__(self): + super(extractText, self).__init__() + self.result = [] + def handle_data(self, data): + self.result.append(data) + def text_in(self): + return ''.join(self.result) + + +def html2text(html): + k = extractText() + k.feed(html) + return k.text_in() + + +def cut_text(filename, count): + with open(filename) as html: + soup = BeautifulSoup(html, features="lxml") + for script in soup(["script", "style"]): + script.extract() + k = [] + for i in soup.findAll('p')[1]: + k.append(i) + b = ''.join(str(e) for e in k) + text = html2text(b.replace("\n", "")) + textreduced = (text[:count] + '...') if len(text) > count else (text + '..') + return(textreduced) + + +def cut_news_text(filename, count): + return cut_text("news/" + filename + ".j2", count) -- cgit v1.2.3