aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFlorian Dold <florian@dold.me>2021-05-07 12:33:45 +0200
committerFlorian Dold <florian@dold.me>2021-05-07 12:33:45 +0200
commitd8ec4d322b5ecb23e992dfa6061dc015c6b4abbf (patch)
tree0ab1b8731d1b49975e539ef3782f89763983bf30
parent5c39698dd951084ccba09a7f09b66511a238d2a9 (diff)
downloadwww_shared-d8ec4d322b5ecb23e992dfa6061dc015c6b4abbf.tar.gz
www_shared-d8ec4d322b5ecb23e992dfa6061dc015c6b4abbf.zip
remove unneeded stuff, structure as package
-rw-r--r--README (renamed from README.text)0
-rwxr-xr-xmake_site.py28
-rw-r--r--mybabel.py25
-rw-r--r--sitegen/__init__.py0
-rw-r--r--sitegen/i18nfix.py (renamed from i18nfix.py)0
-rw-r--r--sitegen/site.py (renamed from site.py)53
-rw-r--r--sitegen/timeutil.py (renamed from time.py)0
-rw-r--r--textproc.py94
8 files changed, 65 insertions, 135 deletions
diff --git a/README.text b/README
index d9650ce..d9650ce 100644
--- a/README.text
+++ b/README
diff --git a/make_site.py b/make_site.py
new file mode 100755
index 0000000..35cc333
--- /dev/null
+++ b/make_site.py
@@ -0,0 +1,28 @@
1#!/usr/bin/env python3
2# coding: utf-8
3#
4# This file is in the public domain.
5#
6# This script runs the jinja2 templating engine on an input template-file
7# using the specified locale for gettext translations, and outputs
8# the resulting (HTML) ouptut-file.
9#
10# Note that the gettext files need to be prepared first. This script
11# is thus to be invoked via the Makefile.
12import jinja2
13import sys
14from pathlib import Path, PurePath
15
16# Make sure the current directory is in the search path when trying
17# to import i18nfix.
18sys.path.insert(0, ".")
19
20from sitegen.site import SiteGenerator
21
22
23def main():
24 x = SiteGenerator()
25 x.run()
26
27if __name__ == "__main__":
28 main()
diff --git a/mybabel.py b/mybabel.py
deleted file mode 100644
index daeb262..0000000
--- a/mybabel.py
+++ /dev/null
@@ -1,25 +0,0 @@
1#!/usr/bin/env python3
2
3# This code is in the public domain.
4#
5# This is a wrapper around 'pybabel' that sets our include path
6# to find the 'i18nfix' module. It takes the name of the
7# pybabel program as the first argument (must be a Python script!)
8# and passes the other arguments to pybabel after setting our
9# sys.path.
10
11import shutil
12import sys
13
14# First, extend the search path as needed (without setting PYTHONPATH!)
15sys.path.insert(0, ".")
16
17# Now, find the actual pybabel program in the $PATH
18pb = shutil.which(sys.argv[1])
19
20# Remove 'pybabel' from argv[] so that pybabel doesn't confuse
21# itself for the first command-line argument ;-)
22sys.argv.remove(sys.argv[1])
23
24# Now we can run pybabel. Yeah!
25exec(compile(source=open(pb).read(), filename=pb, mode="exec"))
diff --git a/sitegen/__init__.py b/sitegen/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/sitegen/__init__.py
diff --git a/i18nfix.py b/sitegen/i18nfix.py
index b00eef3..b00eef3 100644
--- a/i18nfix.py
+++ b/sitegen/i18nfix.py
diff --git a/site.py b/sitegen/site.py
index c090130..5e009dc 100644
--- a/site.py
+++ b/sitegen/site.py
@@ -28,14 +28,41 @@ import jinja2
28from pathlib import Path, PurePosixPath, PurePath 28from pathlib import Path, PurePosixPath, PurePath
29from ruamel.yaml import YAML 29from ruamel.yaml import YAML
30from datetime import datetime 30from datetime import datetime
31import html.parser
32from bs4 import BeautifulSoup
33import sitegen.i18nfix as i18nfix
34from sitegen.timeutil import time_rfc822, time_now, conv_date_rfc822
31 35
32# Make sure the current directory is in the search path when trying
33# to import i18nfix.
34sys.path.insert(0, ".")
35 36
36import inc.i18nfix as i18nfix 37def html2text(html_str):
37from inc.textproc import cut_news_text, cut_article 38 class extractText(html.parser.HTMLParser):
38from inc.time import time_rfc822, time_now, conv_date_rfc822 39 def __init__(self):
40 super(extractText, self).__init__()
41 self.result = []
42
43 def handle_data(self, data):
44 self.result.append(data)
45
46 def text_in(self):
47 return "".join(self.result)
48
49 k = extractText()
50 k.feed(html_str)
51 return k.text_in()
52
53
54def cut_text(filename, count):
55 with open(filename) as html:
56 soup = BeautifulSoup(html, features="lxml")
57 for script in soup(["script", "style"]):
58 script.extract()
59 k = []
60 for i in soup.findAll("p")[1]:
61 k.append(i)
62 b = "".join(str(e) for e in k)
63 text = html2text(b.replace("\n", ""))
64 textreduced = (text[:count] + " [...]") if len(text) > count else (text)
65 return textreduced
39 66
40 67
41def make_helpers(root, in_file, locale): 68def make_helpers(root, in_file, locale):
@@ -87,6 +114,9 @@ def make_helpers(root, in_file, locale):
87 else: 114 else:
88 return url(filename + "." + locale + ".svg") 115 return url(filename + "." + locale + ".svg")
89 116
117 def get_abstract(name, length):
118 return cut_text(root / "template" / (name + ".j2"), length)
119
90 return dict( 120 return dict(
91 self_localized=self_localized, 121 self_localized=self_localized,
92 url_localized=url_localized, 122 url_localized=url_localized,
@@ -96,6 +126,7 @@ def make_helpers(root, in_file, locale):
96 svg_localized=svg_localized, 126 svg_localized=svg_localized,
97 now=time_rfc822(time_now()), 127 now=time_rfc822(time_now()),
98 conv_date_rfc822=conv_date_rfc822, 128 conv_date_rfc822=conv_date_rfc822,
129 get_abstract=get_abstract,
99 ) 130 )
100 131
101 132
@@ -138,16 +169,6 @@ class SiteGenerator:
138 if self.baseurl is None: 169 if self.baseurl is None:
139 self.baseurl = self.config["siteconf"].get("baseurl") 170 self.baseurl = self.config["siteconf"].get("baseurl")
140 171
141 def gen_abstract(self, name, member, pages, length):
142 conf = self.config
143 for item in conf[name]:
144 item[member] = cut_news_text(item[pages], length)
145
146 def gen_newspost_content(self, name, member, pages, lang):
147 conf = self.config
148 for item in conf[name]:
149 item[member] = cut_article(item[pages], conf, lang)
150
151 def run_localized(self, locale, tr): 172 def run_localized(self, locale, tr):
152 conf = self.config 173 conf = self.config
153 root = self.root 174 root = self.root
diff --git a/time.py b/sitegen/timeutil.py
index d023de7..d023de7 100644
--- a/time.py
+++ b/sitegen/timeutil.py
diff --git a/textproc.py b/textproc.py
deleted file mode 100644
index e39ee12..0000000
--- a/textproc.py
+++ /dev/null
@@ -1,94 +0,0 @@
1# Copyright (C) 2019 GNUnet e.V.
2#
3# This code is derived from code contributed to GNUnet e.V.
4# by nikita <nikita@n0.is>.
5#
6# Permission to use, copy, modify, and/or distribute this software for
7# any purpose with or without fee is hereby granted.
8#
9# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
10# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
11# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
12# AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
13# DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
14# OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
15# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
16# PERFORMANCE OF THIS SOFTWARE.
17#
18# SPDX-License-Identifier: 0BSD
19import html.parser
20from bs4 import BeautifulSoup
21
22
23class extractText(html.parser.HTMLParser):
24 def __init__(self):
25 super(extractText, self).__init__()
26 self.result = []
27
28 def handle_data(self, data):
29 self.result.append(data)
30
31 def text_in(self):
32 return "".join(self.result)
33
34
35def html2text(html):
36 k = extractText()
37 k.feed(html)
38 return k.text_in()
39
40
41def cut_text(filename, count):
42 with open(filename) as html:
43 soup = BeautifulSoup(html, features="lxml")
44 for script in soup(["script", "style"]):
45 script.extract()
46 k = []
47 for i in soup.findAll("p")[1]:
48 k.append(i)
49 b = "".join(str(e) for e in k)
50 text = html2text(b.replace("\n", ""))
51 textreduced = (text[:count] + " [...]") if len(text) > count else (text)
52 return textreduced
53
54
55def cut_news_text(filename, count):
56 return cut_text("template/news/" + filename + ".j2", count)
57
58
59# TODO: replace id='...' with frontier so that we can
60# pass it in cut_article reusable, or merge cut_text and
61# cut_by_frontier.
62def cut_by_frontier(filename):
63 with open(filename) as html:
64 soup = BeautifulSoup(html, features="lxml")
65 k = []
66 for i in soup.find(id="newspost-content"):
67 k.append(i)
68 b = "".join(str(e) in k)
69 text = b.replace("\n", "")
70 return text
71
72
73def cut_article(filename, conf, lang):
74 return cut_all("template/news/" + filename + ".j2", conf, lang)
75
76
77def cut_all(filename, conf, lang):
78 with open(filename) as html:
79 soup = BeautifulSoup(html, features="lxml")
80 i = (
81 repr(soup)
82 .replace('{% extends "common/news.j2" %}\n{% block body_content %}\n', "")
83 .replace("\n{% endblock body_content %}", "")
84 .replace("<html><body><p></p>", "")
85 .replace("</body></html>", "")
86 )
87 urlstr = "https://" + conf["siteconf"]["baseurl"] + "/" + lang + "/"
88 text = (
89 i.replace("\n", "")
90 .replace("{{ url_localized('", urlstr)
91 .replace("') }}", "")
92 )
93 # .replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;')
94 return text