extract only article content

author: Florian Dold <florian@dold.me> 2021-05-11 19:16:04 +0200
committer: Florian Dold <florian@dold.me> 2021-05-11 19:16:04 +0200
commit: 2b72c7f57d318271856f992eb2e58c133ae5179e (patch)
tree: 9798186e896ae41497f7389153626d9055266ad8
parent: ebfd9c60d0e59f6373309ac96d8abf6094ceefb9 (diff)
download: www_shared-2b72c7f57d318271856f992eb2e58c133ae5179e.tar.gz
www_shared-2b72c7f57d318271856f992eb2e58c133ae5179e.zip
1 files changed, 5 insertions, 5 deletions
diff --git a/sitegen/site.py b/sitegen/site.py
index f9d3e7d..2148763 100644
--- a/sitegen/site.py
+++ b/sitegen/site.py
@@ -65,14 +65,14 @@ def cut_text(filename, count):
        return textreduced
-def extract_body(text):
+def extract_body(text, content_id="newspost-content"):
    """Extract the body of some HTML and
    return it wrapped in an <article> tag."""
    soup = BeautifulSoup(text, features="lxml")
-    bs = soup.findAll("body")
+    content = soup.find(id=content_id)
-    b = bs[0]
+    if content is None:
-    b.name = "article"
+        raise Error("can't extract content")
-    return b.prettify()
+    return content.prettify()
 def make_helpers(root, in_file, locale):
author	Florian Dold <florian@dold.me>	2021-05-11 19:16:04 +0200
committer	Florian Dold <florian@dold.me>	2021-05-11 19:16:04 +0200
commit	2b72c7f57d318271856f992eb2e58c133ae5179e (patch)
tree	9798186e896ae41497f7389153626d9055266ad8
parent	ebfd9c60d0e59f6373309ac96d8abf6094ceefb9 (diff)
download	www_shared-2b72c7f57d318271856f992eb2e58c133ae5179e.tar.gz www_shared-2b72c7f57d318271856f992eb2e58c133ae5179e.zip

diff --git a/sitegen/site.py b/sitegen/site.py index f9d3e7d..2148763 100644 --- a/sitegen/site.py +++ b/sitegen/site.py
@@ -65,14 +65,14 @@ def cut_text(filename, count):
65	return textreduced	65	return textreduced
66		66
67		67
68	def extract_body(text):	68	def extract_body(text, content_id="newspost-content"):
69	"""Extract the body of some HTML and	69	"""Extract the body of some HTML and
70	return it wrapped in an <article> tag."""	70	return it wrapped in an <article> tag."""
71	soup = BeautifulSoup(text, features="lxml")	71	soup = BeautifulSoup(text, features="lxml")
72	bs = soup.findAll("body")	72	content = soup.find(id=content_id)
73	b = bs[0]	73	if content is None:
74	b.name = "article"	74	raise Error("can't extract content")
75	return b.prettify()	75	return content.prettify()
76		76
77		77
78	def make_helpers(root, in_file, locale):	78	def make_helpers(root, in_file, locale):