# Copyright (C) 2019 GNUnet e.V. # # This code is derived from code contributed to GNUnet e.V. # by nikita and based on code by Florian Dold. # # Permission to use, copy, modify, and/or distribute this software for # any purpose with or without fee is hereby granted. # # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL # WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE # AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL # DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA # OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER # TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR # PERFORMANCE OF THIS SOFTWARE. # # SPDX-License-Identifier: 0BSD import os import sitegen.myshutil as shutil import os.path import sys import re import gettext import glob import codecs import jinja2 from pathlib import Path, PurePosixPath, PurePath from ruamel.yaml import YAML from datetime import datetime import html.parser from bs4 import BeautifulSoup import sitegen.i18nfix as i18nfix from sitegen.timeutil import time_rfc822, time_now, conv_date_rfc822 PROPERTIES_YML = "properties.yml" def html2text(html_str): class extractText(html.parser.HTMLParser): def __init__(self): super(extractText, self).__init__() self.result = [] def handle_data(self, data): self.result.append(data) def text_in(self): return "".join(self.result) k = extractText() k.feed(html_str) return k.text_in() def cut_text(filename, count): with open(filename) as html: soup = BeautifulSoup(html, features="lxml") for script in soup(["script", "style"]): script.extract() k = [] for i in soup.findAll("p")[1]: k.append(i) b = "".join(str(e) for e in k) text = html2text(b.replace("\n", " ")) textreduced = (text[:count] + " [...]") if len(text) > count else (text) return textreduced def extract_body(text, content_id="newspost-content"): """Extract the body of some HTML and return it wrapped in an
tag.""" soup = BeautifulSoup(text, features="lxml") content = soup.find(id=content_id) if content is None: raise Error("can't extract content") return content.prettify() def make_helpers(root, in_file, locale): """Return a dictionary of helpers that should be available in the template.""" def self_localized(other_locale, relative=False): """ Return URL for the current page in another locale. """ abs_file = Path(in_file).resolve() baseurl = os.environ.get("BASEURL") if relative or not baseurl: return ( "../" + other_locale + "/" + str(in_file.relative_to(root / "template")).rstrip(".j2") ) else: return ( baseurl + other_locale + "/" + str(Path(abs_file).relative_to(root / "template")).rstrip(".j2") ) def url(x, virtual=False): abs_file = Path(in_file).resolve() def check_exists(): if virtual: return True if Path(root / "static" / x).exists(): return True if Path(root / "template" / (x + ".j2")).exists(): return True if Path(root / "template" / (x + ".j2")).exists(): return True slashpos = x.find("/") if slashpos < 0: return False rest = x[slashpos+1:] # Heuristic search for translated file if Path(root / "template" / (rest + ".j2")).exists(): return True return False if not check_exists(): raise FileNotFoundError("can't find " + x) url = "" current_location = Path(abs_file).relative_to(root / "template") for p in current_location.parts: url += "../" return url + x def url_static(filename): return url(filename) def url_dist(filename): return url("dist/" + filename) def url_localized(filename): return url(locale + "/" + filename) def svg_localized(filename): lf = root / "static" / filename / "." / locale / ".svg" if locale == "en" or not lf.is_file(): return url(filename + ".svg") else: return url(filename + "." + locale + ".svg") def get_abstract(name, length): return cut_text(root / "template" / (name + ".j2"), length) return dict( self_localized=self_localized, url_localized=url_localized, url_static=url_static, url_dist=url_dist, url=url, svg_localized=svg_localized, now=time_rfc822(time_now()), conv_date_rfc822=conv_date_rfc822, get_abstract=get_abstract, ) def copytree(src, dst, symlinks=False, ignore=None): for item in os.listdir(src): s = os.path.join(src, item) d = os.path.join(dst, item) if os.path.isdir(s): shutil.copytree(s, d, symlinks, ignore, dirs_exist_ok=True) else: shutil.copy2(s, d) class SiteGenerator: def __init__(self, debug=0, root="."): self.root = Path(root).resolve() self.debug = debug loader = jinja2.ChoiceLoader( [ jinja2.FileSystemLoader(str(self.root / "template")), jinja2.PrefixLoader( {"common": jinja2.FileSystemLoader(str(self.root / "common"))} ), ] ) env = jinja2.Environment( loader=loader, extensions=["jinja2.ext.i18n"], lstrip_blocks=True, trim_blocks=True, undefined=jinja2.StrictUndefined, autoescape=False, ) env.filters["extract_body"] = extract_body env.newstyle_gettext = True self.env = env # NOTE : read-in properties.yml and afterwards all *.yml in properties.d properties = {} yaml = YAML(typ="safe") yaml.preserve_quotes = True properties.update(yaml.load(self.root/PROPERTIES_YML)) properties_dir = PROPERTIES_YML.removesuffix('.yml') + '.d' for filepath in Path(self.root/properties_dir).glob('*.yml'): properties.update(yaml.load(filepath)) self.config = properties self.baseurl = os.environ.get("BASEURL") if not self.baseurl: self.baseurl = self.config["siteconf"].get("baseurl") def run_localized(self, locale, tr): conf = self.config root = self.root env = self.env template_dir = root / "template" for in_file in template_dir.glob("**/*.j2"): tmpl_filename = str(in_file.resolve().relative_to(template_dir)) tmpl = env.get_template(tmpl_filename) filename = tmpl_filename.rstrip(".j2") content = tmpl.render( lang=locale, lang_full=conf["langs_full"][locale], conf=conf, siteconf=conf["siteconf"], meetingnotesdata=conf["meetingnotes"], newsdata=conf["newsposts"], newsposts=conf["newsposts"], videosdata=conf["videoslist"], filename=filename, **make_helpers(root, in_file, locale), ) out_name = root / "rendered" / locale / str(tmpl_filename).rstrip(".j2") Path(out_name).parent.mkdir(parents=True, exist_ok=True) with codecs.open(out_name, "w", encoding="utf-8") as f: try: f.write(content) except: print(e) def emit_sitemap(self): p = self.root / "rendered" links = sorted(p.rglob("*.html")) t0 = datetime.now() timestamp = t0.strftime("%Y-%m-%d") o = p / "sitemap.xml" with o.open("w") as f: f.write('\n') f.write("\n') for link in links: href = self.baseurl + str(link.relative_to(p)) f.write( "" + href + "" + timestamp + "1.0\n" ) f.write("\n") def run(self): conf = self.config root = self.root for l in root.glob("locale/*/"): if not l.is_dir(): # https://bugs.python.org/issue22276 continue locale = str(PurePath(l).name) try: tr = gettext.translation( "messages", localedir="locale", languages=[locale] ) except FileNotFoundError as e: print( f"WARNING: unable to find translations for locale '{locale}'", file=sys.stderr, ) continue tr.gettext = i18nfix.wrap_gettext(tr.gettext) env = self.env env.install_gettext_translations(tr, newstyle=True) if locale not in conf["langs_full"]: print( f"WARNING: skipping '{locale}, as 'langs_full' is not configured'", file=sys.stderr, ) continue self.run_localized(locale, tr) Path(root / "rendered").mkdir(exist_ok=True) copytree(root / "static", root / "rendered") self.emit_sitemap()