1 files changed, 191 insertions, 0 deletions
diff --git a/rssg b/rssg
new file mode 100755
index 00000000..4435ce60
--- /dev/null
+++ b/rssg
@@ -0,0 +1,191 @@
+#!/bin/sh
+#
+# https://www.romanzolotarev.com/bin/rssg
+# Copyright 2018 Roman Zolotarev <hi@romanzolotarev.com>
+# Copyright 2019 ng0 <ng0@n0.is>
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+#
+set -e
+main () {
+        test -n "$1" || usage
+        test -n "$2" || usage
+        test -f "$1" || no_file "$1"
+        index_file=$(readlink -f "$1")
+        test -z "${index_file##*html}" && html=$(cat "$index_file")
+        test -z "${index_file##*md}" && html=$(md_to_html "$index_file")
+        test -n "$html" || usage
+        base="${index_file%/*}"
+        base_url="$(echo "$html" | get_url | sed 's#\(.*\)/[^/]*#\1#')"
+        url=$(          echo "$html" | get_url)
+        title="$2"
+        description=$(  echo "$html" | get_description |
+                        remove_tags |
+                        remove_nbsp )
+        items=$(        echo "$html" | get_items)
+        rss=$(          echo "$items" |
+                        render_items "$base" "$base_url" |
+                        render_feed "$url" "$title" "$description")
+        >&2 echo "[rssg] ${index_file##$(pwd)/} $(echo "$rss" | grep -c '<item>') items"
+        echo "$rss"
+}
+usage() {
+        echo "usage: ${0##*/} index.{html,md} title > rss.xml" >&2
+        exit 1
+}
+no_file() {
+        echo "${0##*/}: $1: No such file" >&2
+        exit 2
+}
+md_to_html() {
+        test -x "$(which lowdown)" || exit 3
+        lowdown \
+        -D html-skiphtml \
+        -D smarty \
+        -d metadata \
+        -d autolink "$1"
+}
+get_title() {
+        awk 'tolower($0)~/^<h1/{gsub(/<[^>]*>/,"",$0);print;exit}'
+}
+get_url() {
+        grep -i '<a .*rss.xml"' | head -1 |
+        sed 's#.*href="\(.*\)".*#\1#'
+}
+get_items() {
+        grep -i 'href=".*" title="' |
+        sed 's#.*href="\(.*\)" title="\(.*\)">\(.*\)</a>.*#\1 \2 \3#'
+}
+get_description() {
+        start='sub("^.*<"s"*"t"("s"[^>]*)?>","")'
+        stop='sub("</"s"*"t""s"*>.*","")&&x=1'
+        awk -v 's=[[:space:]]' -v 't=[Pp]' "$start,$stop;x{exit}"
+}
+remove_tags() {
+        sed 's#<[^>]*>##g;s#</[^>]*>##g'
+}
+remove_nbsp() {
+        sed 's#\&nbsp;# #g'
+}
+rel_to_abs_urls() {
+        site_url="$1"
+        base_url="$2"
+        abs='s#(src|href)="/([^"]*)"#\1="'"$site_url"/'\2"#g'
+        rel='s#(src|href)="([^:/"]*)"#\1="'"$base_url"/'\2"#g'
+        sed -E "$abs;$rel"
+}
+date_rfc_822() {
+        date -j '+%a, %d %b %Y %H:%M:%S %z' \
+        "$(echo "$1"| tr -cd '[:digit:]')0000"
+}
+render_items() {
+        while read -r i
+        do render_item "$1" "$2" "$i"
+        done
+}
+render_item() {
+        base="$1"
+        base_url="$2"
+        item="$3"
+        site_url="$(echo "$base_url"| sed 's#\(.*//.*\)/.*#\1#')"
+        date=$(echo "$item"|awk '{print$2}')
+        url=$(echo "$item"|awk '{print$1}')
+        f="$base/$url"
+        test -f "$f" && html=$(cat "$f")
+        test -f "${f%\.html}.md" && html=$(md_to_html "${f%\.html}.md")
+        description=$(
+                echo "$html" |
+                rel_to_abs_urls "$site_url" "$base_url" |
+                remove_nbsp | awk '/<!--BEGIN-->/,/<!--END-->/'
+        )
+        title=$(echo "$description" | get_title)
+        guid="$base_url/$(echo "$url" | sed 's#^/##')"
+        echo '
+<item>
+<guid>'"$guid"'</guid>
+<link>'"$guid"'</link>
+<pubDate>'"$(date_rfc_822 "$date")"'</pubDate>
+<title>'"$title"'</title>
+<description><![CDATA[
+'"$description"'
+]]></description>
+</item>'
+}
+render_feed() {
+        url="$1"
+        title=$(echo "$2" | remove_nbsp)
+        description="$3"
+        base_url="$(echo "$url" | cut -d '/' -f1-3)"
+        echo '<?xml version="1.0" encoding="UTF-8"?>
+<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
+<channel>
+<atom:link href="'"$url"'" rel="self" type="application/rss+xml" />
+<title>'"$title"'</title>
+<description>'"$description"'</description>
+<link>'"$base_url"'/</link>
+<lastBuildDate>'"$(date_rfc_822 date)"'</lastBuildDate>
+'"$(cat)"'
+</channel></rss>'
+}
+main "$@"

diff --git a/rssg b/rssg new file mode 100755 index 00000000..4435ce60 --- /dev/null +++ b/rssg
@@ -0,0 +1,191 @@
	1	#!/bin/sh
	2	#
	3	# https://www.romanzolotarev.com/bin/rssg
	4	# Copyright 2018 Roman Zolotarev <hi@romanzolotarev.com>
	5	# Copyright 2019 ng0 <ng0@n0.is>
	6	#
	7	# Permission to use, copy, modify, and/or distribute this software for any
	8	# purpose with or without fee is hereby granted, provided that the above
	9	# copyright notice and this permission notice appear in all copies.
	10	#
	11	# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
	12	# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
	13	# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
	14	# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
	15	# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
	16	# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
	17	# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
	18	#
	19	set -e
	20
	21
	22	main () {
	23	test -n "$1" \|\| usage
	24	test -n "$2" \|\| usage
	25	test -f "$1" \|\| no_file "$1"
	26
	27
	28	index_file=$(readlink -f "$1")
	29	test -z "${index_file##*html}" && html=$(cat "$index_file")
	30	test -z "${index_file##*md}" && html=$(md_to_html "$index_file")
	31	test -n "$html" \|\| usage
	32
	33	base="${index_file%/*}"
	34	base_url="$(echo "$html" \| get_url \| sed 's#\(.\)/[^/]#\1#')"
	35
	36	url=$( echo "$html" \| get_url)
	37
	38	title="$2"
	39
	40	description=$( echo "$html" \| get_description \|
	41	remove_tags \|
	42	remove_nbsp )
	43
	44	items=$( echo "$html" \| get_items)
	45
	46	rss=$( echo "$items" \|
	47	render_items "$base" "$base_url" \|
	48	render_feed "$url" "$title" "$description")
	49
	50	>&2 echo "[rssg] ${index_file##$(pwd)/} $(echo "$rss" \| grep -c '<item>') items"
	51	echo "$rss"
	52	}
	53
	54
	55	usage() {
	56	echo "usage: ${0##*/} index.{html,md} title > rss.xml" >&2
	57	exit 1
	58	}
	59
	60
	61	no_file() {
	62	echo "${0##*/}: $1: No such file" >&2
	63	exit 2
	64	}
	65
	66
	67	md_to_html() {
	68	test -x "$(which lowdown)" \|\| exit 3
	69	lowdown \
	70	-D html-skiphtml \
	71	-D smarty \
	72	-d metadata \
	73	-d autolink "$1"
	74	}
	75
	76
	77	get_title() {
	78	awk 'tolower($0)~/^<h1/{gsub(/<[^>]*>/,"",$0);print;exit}'
	79	}
	80
	81
	82	get_url() {
	83	grep -i '<a .*rss.xml"' \| head -1 \|
	84	sed 's#.href="\(.\)".*#\1#'
	85	}
	86
	87
	88	get_items() {
	89	grep -i 'href=".*" title="' \|
	90	sed 's#.href="\(.\)" title="\(.\)">\(.\)</a>.*#\1 \2 \3#'
	91	}
	92
	93
	94	get_description() {
	95	start='sub("^.<"s""t"("s"[^>]*)?>","")'
	96	stop='sub("</"s""t""s">.*","")&&x=1'
	97	awk -v 's=[[:space:]]' -v 't=[Pp]' "$start,$stop;x{exit}"
	98	}
	99
	100	remove_tags() {
	101	sed 's#<[^>]>##g;s#</[^>]>##g'
	102	}
	103
	104
	105	remove_nbsp() {
	106	sed 's#\ # #g'
	107	}
	108
	109
	110	rel_to_abs_urls() {
	111	site_url="$1"
	112	base_url="$2"
	113
	114	abs='s#(src\|href)="/([^"]*)"#\1="'"$site_url"/'\2"#g'
	115	rel='s#(src\|href)="([^:/"]*)"#\1="'"$base_url"/'\2"#g'
	116	sed -E "$abs;$rel"
	117	}
	118
	119
	120	date_rfc_822() {
	121	date -j '+%a, %d %b %Y %H:%M:%S %z' \
	122	"$(echo "$1"\| tr -cd '[:digit:]')0000"
	123	}
	124
	125
	126	render_items() {
	127	while read -r i
	128	do render_item "$1" "$2" "$i"
	129	done
	130	}
	131
	132
	133	render_item() {
	134	base="$1"
	135	base_url="$2"
	136	item="$3"
	137
	138	site_url="$(echo "$base_url"\| sed 's#\(.//.\)/.*#\1#')"
	139
	140	date=$(echo "$item"\|awk '{print$2}')
	141
	142	url=$(echo "$item"\|awk '{print$1}')
	143
	144	f="$base/$url"
	145	test -f "$f" && html=$(cat "$f")
	146	test -f "${f%\.html}.md" && html=$(md_to_html "${f%\.html}.md")
	147
	148	description=$(
	149	echo "$html" \|
	150	rel_to_abs_urls "$site_url" "$base_url" \|
	151	remove_nbsp \| awk '/<!--BEGIN-->/,/<!--END-->/'
	152	)
	153	title=$(echo "$description" \| get_title)
	154	guid="$base_url/$(echo "$url" \| sed 's#^/##')"
	155
	156	echo '
	157	<item>
	158	<guid>'"$guid"'</guid>
	159	<link>'"$guid"'</link>
	160	<pubDate>'"$(date_rfc_822 "$date")"'</pubDate>
	161	<title>'"$title"'</title>
	162	<description><![CDATA[
	163
	164	'"$description"'
	165
	166	]]></description>
	167	</item>'
	168	}
	169
	170
	171	render_feed() {
	172	url="$1"
	173	title=$(echo "$2" \| remove_nbsp)
	174	description="$3"
	175
	176	base_url="$(echo "$url" \| cut -d '/' -f1-3)"
	177
	178	echo '<?xml version="1.0" encoding="UTF-8"?>
	179	<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
	180	<channel>
	181	<atom:link href="'"$url"'" rel="self" type="application/rss+xml" />
	182	<title>'"$title"'</title>
	183	<description>'"$description"'</description>
	184	<link>'"$base_url"'/</link>
	185	<lastBuildDate>'"$(date_rfc_822 date)"'</lastBuildDate>
	186	'"$(cat)"'
	187	</channel></rss>'
	188	}
	189
	190
	191	main "$@"