diff options
author | Florian Dold <florian@dold.me> | 2021-05-11 19:16:04 +0200 |
---|---|---|
committer | Florian Dold <florian@dold.me> | 2021-05-11 19:16:04 +0200 |
commit | 2b72c7f57d318271856f992eb2e58c133ae5179e (patch) | |
tree | 9798186e896ae41497f7389153626d9055266ad8 | |
parent | ebfd9c60d0e59f6373309ac96d8abf6094ceefb9 (diff) | |
download | www_shared-2b72c7f57d318271856f992eb2e58c133ae5179e.tar.gz www_shared-2b72c7f57d318271856f992eb2e58c133ae5179e.zip |
extract only article content
-rw-r--r-- | sitegen/site.py | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/sitegen/site.py b/sitegen/site.py index f9d3e7d..2148763 100644 --- a/sitegen/site.py +++ b/sitegen/site.py | |||
@@ -65,14 +65,14 @@ def cut_text(filename, count): | |||
65 | return textreduced | 65 | return textreduced |
66 | 66 | ||
67 | 67 | ||
68 | def extract_body(text): | 68 | def extract_body(text, content_id="newspost-content"): |
69 | """Extract the body of some HTML and | 69 | """Extract the body of some HTML and |
70 | return it wrapped in an <article> tag.""" | 70 | return it wrapped in an <article> tag.""" |
71 | soup = BeautifulSoup(text, features="lxml") | 71 | soup = BeautifulSoup(text, features="lxml") |
72 | bs = soup.findAll("body") | 72 | content = soup.find(id=content_id) |
73 | b = bs[0] | 73 | if content is None: |
74 | b.name = "article" | 74 | raise Error("can't extract content") |
75 | return b.prettify() | 75 | return content.prettify() |
76 | 76 | ||
77 | 77 | ||
78 | def make_helpers(root, in_file, locale): | 78 | def make_helpers(root, in_file, locale): |