diff options
author | Florian Dold <florian@dold.me> | 2021-05-07 12:33:45 +0200 |
---|---|---|
committer | Florian Dold <florian@dold.me> | 2021-05-07 12:33:45 +0200 |
commit | d8ec4d322b5ecb23e992dfa6061dc015c6b4abbf (patch) | |
tree | 0ab1b8731d1b49975e539ef3782f89763983bf30 | |
parent | 5c39698dd951084ccba09a7f09b66511a238d2a9 (diff) | |
download | www_shared-d8ec4d322b5ecb23e992dfa6061dc015c6b4abbf.tar.gz www_shared-d8ec4d322b5ecb23e992dfa6061dc015c6b4abbf.zip |
remove unneeded stuff, structure as package
-rw-r--r-- | README (renamed from README.text) | 0 | ||||
-rwxr-xr-x | make_site.py | 28 | ||||
-rw-r--r-- | mybabel.py | 25 | ||||
-rw-r--r-- | sitegen/__init__.py | 0 | ||||
-rw-r--r-- | sitegen/i18nfix.py (renamed from i18nfix.py) | 0 | ||||
-rw-r--r-- | sitegen/site.py (renamed from site.py) | 53 | ||||
-rw-r--r-- | sitegen/timeutil.py (renamed from time.py) | 0 | ||||
-rw-r--r-- | textproc.py | 94 |
8 files changed, 65 insertions, 135 deletions
diff --git a/make_site.py b/make_site.py new file mode 100755 index 0000000..35cc333 --- /dev/null +++ b/make_site.py | |||
@@ -0,0 +1,28 @@ | |||
1 | #!/usr/bin/env python3 | ||
2 | # coding: utf-8 | ||
3 | # | ||
4 | # This file is in the public domain. | ||
5 | # | ||
6 | # This script runs the jinja2 templating engine on an input template-file | ||
7 | # using the specified locale for gettext translations, and outputs | ||
8 | # the resulting (HTML) ouptut-file. | ||
9 | # | ||
10 | # Note that the gettext files need to be prepared first. This script | ||
11 | # is thus to be invoked via the Makefile. | ||
12 | import jinja2 | ||
13 | import sys | ||
14 | from pathlib import Path, PurePath | ||
15 | |||
16 | # Make sure the current directory is in the search path when trying | ||
17 | # to import i18nfix. | ||
18 | sys.path.insert(0, ".") | ||
19 | |||
20 | from sitegen.site import SiteGenerator | ||
21 | |||
22 | |||
23 | def main(): | ||
24 | x = SiteGenerator() | ||
25 | x.run() | ||
26 | |||
27 | if __name__ == "__main__": | ||
28 | main() | ||
diff --git a/mybabel.py b/mybabel.py deleted file mode 100644 index daeb262..0000000 --- a/mybabel.py +++ /dev/null | |||
@@ -1,25 +0,0 @@ | |||
1 | #!/usr/bin/env python3 | ||
2 | |||
3 | # This code is in the public domain. | ||
4 | # | ||
5 | # This is a wrapper around 'pybabel' that sets our include path | ||
6 | # to find the 'i18nfix' module. It takes the name of the | ||
7 | # pybabel program as the first argument (must be a Python script!) | ||
8 | # and passes the other arguments to pybabel after setting our | ||
9 | # sys.path. | ||
10 | |||
11 | import shutil | ||
12 | import sys | ||
13 | |||
14 | # First, extend the search path as needed (without setting PYTHONPATH!) | ||
15 | sys.path.insert(0, ".") | ||
16 | |||
17 | # Now, find the actual pybabel program in the $PATH | ||
18 | pb = shutil.which(sys.argv[1]) | ||
19 | |||
20 | # Remove 'pybabel' from argv[] so that pybabel doesn't confuse | ||
21 | # itself for the first command-line argument ;-) | ||
22 | sys.argv.remove(sys.argv[1]) | ||
23 | |||
24 | # Now we can run pybabel. Yeah! | ||
25 | exec(compile(source=open(pb).read(), filename=pb, mode="exec")) | ||
diff --git a/sitegen/__init__.py b/sitegen/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/sitegen/__init__.py | |||
diff --git a/i18nfix.py b/sitegen/i18nfix.py index b00eef3..b00eef3 100644 --- a/i18nfix.py +++ b/sitegen/i18nfix.py | |||
diff --git a/site.py b/sitegen/site.py index c090130..5e009dc 100644 --- a/site.py +++ b/sitegen/site.py | |||
@@ -28,14 +28,41 @@ import jinja2 | |||
28 | from pathlib import Path, PurePosixPath, PurePath | 28 | from pathlib import Path, PurePosixPath, PurePath |
29 | from ruamel.yaml import YAML | 29 | from ruamel.yaml import YAML |
30 | from datetime import datetime | 30 | from datetime import datetime |
31 | import html.parser | ||
32 | from bs4 import BeautifulSoup | ||
33 | import sitegen.i18nfix as i18nfix | ||
34 | from sitegen.timeutil import time_rfc822, time_now, conv_date_rfc822 | ||
31 | 35 | ||
32 | # Make sure the current directory is in the search path when trying | ||
33 | # to import i18nfix. | ||
34 | sys.path.insert(0, ".") | ||
35 | 36 | ||
36 | import inc.i18nfix as i18nfix | 37 | def html2text(html_str): |
37 | from inc.textproc import cut_news_text, cut_article | 38 | class extractText(html.parser.HTMLParser): |
38 | from inc.time import time_rfc822, time_now, conv_date_rfc822 | 39 | def __init__(self): |
40 | super(extractText, self).__init__() | ||
41 | self.result = [] | ||
42 | |||
43 | def handle_data(self, data): | ||
44 | self.result.append(data) | ||
45 | |||
46 | def text_in(self): | ||
47 | return "".join(self.result) | ||
48 | |||
49 | k = extractText() | ||
50 | k.feed(html_str) | ||
51 | return k.text_in() | ||
52 | |||
53 | |||
54 | def cut_text(filename, count): | ||
55 | with open(filename) as html: | ||
56 | soup = BeautifulSoup(html, features="lxml") | ||
57 | for script in soup(["script", "style"]): | ||
58 | script.extract() | ||
59 | k = [] | ||
60 | for i in soup.findAll("p")[1]: | ||
61 | k.append(i) | ||
62 | b = "".join(str(e) for e in k) | ||
63 | text = html2text(b.replace("\n", "")) | ||
64 | textreduced = (text[:count] + " [...]") if len(text) > count else (text) | ||
65 | return textreduced | ||
39 | 66 | ||
40 | 67 | ||
41 | def make_helpers(root, in_file, locale): | 68 | def make_helpers(root, in_file, locale): |
@@ -87,6 +114,9 @@ def make_helpers(root, in_file, locale): | |||
87 | else: | 114 | else: |
88 | return url(filename + "." + locale + ".svg") | 115 | return url(filename + "." + locale + ".svg") |
89 | 116 | ||
117 | def get_abstract(name, length): | ||
118 | return cut_text(root / "template" / (name + ".j2"), length) | ||
119 | |||
90 | return dict( | 120 | return dict( |
91 | self_localized=self_localized, | 121 | self_localized=self_localized, |
92 | url_localized=url_localized, | 122 | url_localized=url_localized, |
@@ -96,6 +126,7 @@ def make_helpers(root, in_file, locale): | |||
96 | svg_localized=svg_localized, | 126 | svg_localized=svg_localized, |
97 | now=time_rfc822(time_now()), | 127 | now=time_rfc822(time_now()), |
98 | conv_date_rfc822=conv_date_rfc822, | 128 | conv_date_rfc822=conv_date_rfc822, |
129 | get_abstract=get_abstract, | ||
99 | ) | 130 | ) |
100 | 131 | ||
101 | 132 | ||
@@ -138,16 +169,6 @@ class SiteGenerator: | |||
138 | if self.baseurl is None: | 169 | if self.baseurl is None: |
139 | self.baseurl = self.config["siteconf"].get("baseurl") | 170 | self.baseurl = self.config["siteconf"].get("baseurl") |
140 | 171 | ||
141 | def gen_abstract(self, name, member, pages, length): | ||
142 | conf = self.config | ||
143 | for item in conf[name]: | ||
144 | item[member] = cut_news_text(item[pages], length) | ||
145 | |||
146 | def gen_newspost_content(self, name, member, pages, lang): | ||
147 | conf = self.config | ||
148 | for item in conf[name]: | ||
149 | item[member] = cut_article(item[pages], conf, lang) | ||
150 | |||
151 | def run_localized(self, locale, tr): | 172 | def run_localized(self, locale, tr): |
152 | conf = self.config | 173 | conf = self.config |
153 | root = self.root | 174 | root = self.root |
diff --git a/time.py b/sitegen/timeutil.py index d023de7..d023de7 100644 --- a/time.py +++ b/sitegen/timeutil.py | |||
diff --git a/textproc.py b/textproc.py deleted file mode 100644 index e39ee12..0000000 --- a/textproc.py +++ /dev/null | |||
@@ -1,94 +0,0 @@ | |||
1 | # Copyright (C) 2019 GNUnet e.V. | ||
2 | # | ||
3 | # This code is derived from code contributed to GNUnet e.V. | ||
4 | # by nikita <nikita@n0.is>. | ||
5 | # | ||
6 | # Permission to use, copy, modify, and/or distribute this software for | ||
7 | # any purpose with or without fee is hereby granted. | ||
8 | # | ||
9 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL | ||
10 | # WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED | ||
11 | # WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE | ||
12 | # AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL | ||
13 | # DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA | ||
14 | # OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER | ||
15 | # TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | ||
16 | # PERFORMANCE OF THIS SOFTWARE. | ||
17 | # | ||
18 | # SPDX-License-Identifier: 0BSD | ||
19 | import html.parser | ||
20 | from bs4 import BeautifulSoup | ||
21 | |||
22 | |||
23 | class extractText(html.parser.HTMLParser): | ||
24 | def __init__(self): | ||
25 | super(extractText, self).__init__() | ||
26 | self.result = [] | ||
27 | |||
28 | def handle_data(self, data): | ||
29 | self.result.append(data) | ||
30 | |||
31 | def text_in(self): | ||
32 | return "".join(self.result) | ||
33 | |||
34 | |||
35 | def html2text(html): | ||
36 | k = extractText() | ||
37 | k.feed(html) | ||
38 | return k.text_in() | ||
39 | |||
40 | |||
41 | def cut_text(filename, count): | ||
42 | with open(filename) as html: | ||
43 | soup = BeautifulSoup(html, features="lxml") | ||
44 | for script in soup(["script", "style"]): | ||
45 | script.extract() | ||
46 | k = [] | ||
47 | for i in soup.findAll("p")[1]: | ||
48 | k.append(i) | ||
49 | b = "".join(str(e) for e in k) | ||
50 | text = html2text(b.replace("\n", "")) | ||
51 | textreduced = (text[:count] + " [...]") if len(text) > count else (text) | ||
52 | return textreduced | ||
53 | |||
54 | |||
55 | def cut_news_text(filename, count): | ||
56 | return cut_text("template/news/" + filename + ".j2", count) | ||
57 | |||
58 | |||
59 | # TODO: replace id='...' with frontier so that we can | ||
60 | # pass it in cut_article reusable, or merge cut_text and | ||
61 | # cut_by_frontier. | ||
62 | def cut_by_frontier(filename): | ||
63 | with open(filename) as html: | ||
64 | soup = BeautifulSoup(html, features="lxml") | ||
65 | k = [] | ||
66 | for i in soup.find(id="newspost-content"): | ||
67 | k.append(i) | ||
68 | b = "".join(str(e) in k) | ||
69 | text = b.replace("\n", "") | ||
70 | return text | ||
71 | |||
72 | |||
73 | def cut_article(filename, conf, lang): | ||
74 | return cut_all("template/news/" + filename + ".j2", conf, lang) | ||
75 | |||
76 | |||
77 | def cut_all(filename, conf, lang): | ||
78 | with open(filename) as html: | ||
79 | soup = BeautifulSoup(html, features="lxml") | ||
80 | i = ( | ||
81 | repr(soup) | ||
82 | .replace('{% extends "common/news.j2" %}\n{% block body_content %}\n', "") | ||
83 | .replace("\n{% endblock body_content %}", "") | ||
84 | .replace("<html><body><p></p>", "") | ||
85 | .replace("</body></html>", "") | ||
86 | ) | ||
87 | urlstr = "https://" + conf["siteconf"]["baseurl"] + "/" + lang + "/" | ||
88 | text = ( | ||
89 | i.replace("\n", "") | ||
90 | .replace("{{ url_localized('", urlstr) | ||
91 | .replace("') }}", "") | ||
92 | ) | ||
93 | # .replace('<', '<').replace('>', '>').replace('"', '"') | ||
94 | return text | ||