aboutsummaryrefslogtreecommitdiff
path: root/rssg
diff options
context:
space:
mode:
Diffstat (limited to 'rssg')
-rwxr-xr-xrssg191
1 files changed, 191 insertions, 0 deletions
diff --git a/rssg b/rssg
new file mode 100755
index 00000000..4435ce60
--- /dev/null
+++ b/rssg
@@ -0,0 +1,191 @@
1#!/bin/sh
2#
3# https://www.romanzolotarev.com/bin/rssg
4# Copyright 2018 Roman Zolotarev <hi@romanzolotarev.com>
5# Copyright 2019 ng0 <ng0@n0.is>
6#
7# Permission to use, copy, modify, and/or distribute this software for any
8# purpose with or without fee is hereby granted, provided that the above
9# copyright notice and this permission notice appear in all copies.
10#
11# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18#
19set -e
20
21
22main () {
23 test -n "$1" || usage
24 test -n "$2" || usage
25 test -f "$1" || no_file "$1"
26
27
28 index_file=$(readlink -f "$1")
29 test -z "${index_file##*html}" && html=$(cat "$index_file")
30 test -z "${index_file##*md}" && html=$(md_to_html "$index_file")
31 test -n "$html" || usage
32
33 base="${index_file%/*}"
34 base_url="$(echo "$html" | get_url | sed 's#\(.*\)/[^/]*#\1#')"
35
36 url=$( echo "$html" | get_url)
37
38 title="$2"
39
40 description=$( echo "$html" | get_description |
41 remove_tags |
42 remove_nbsp )
43
44 items=$( echo "$html" | get_items)
45
46 rss=$( echo "$items" |
47 render_items "$base" "$base_url" |
48 render_feed "$url" "$title" "$description")
49
50 >&2 echo "[rssg] ${index_file##$(pwd)/} $(echo "$rss" | grep -c '<item>') items"
51 echo "$rss"
52}
53
54
55usage() {
56 echo "usage: ${0##*/} index.{html,md} title > rss.xml" >&2
57 exit 1
58}
59
60
61no_file() {
62 echo "${0##*/}: $1: No such file" >&2
63 exit 2
64}
65
66
67md_to_html() {
68 test -x "$(which lowdown)" || exit 3
69 lowdown \
70 -D html-skiphtml \
71 -D smarty \
72 -d metadata \
73 -d autolink "$1"
74}
75
76
77get_title() {
78 awk 'tolower($0)~/^<h1/{gsub(/<[^>]*>/,"",$0);print;exit}'
79}
80
81
82get_url() {
83 grep -i '<a .*rss.xml"' | head -1 |
84 sed 's#.*href="\(.*\)".*#\1#'
85}
86
87
88get_items() {
89 grep -i 'href=".*" title="' |
90 sed 's#.*href="\(.*\)" title="\(.*\)">\(.*\)</a>.*#\1 \2 \3#'
91}
92
93
94get_description() {
95 start='sub("^.*<"s"*"t"("s"[^>]*)?>","")'
96 stop='sub("</"s"*"t""s"*>.*","")&&x=1'
97 awk -v 's=[[:space:]]' -v 't=[Pp]' "$start,$stop;x{exit}"
98}
99
100remove_tags() {
101 sed 's#<[^>]*>##g;s#</[^>]*>##g'
102}
103
104
105remove_nbsp() {
106 sed 's#\&nbsp;# #g'
107}
108
109
110rel_to_abs_urls() {
111 site_url="$1"
112 base_url="$2"
113
114 abs='s#(src|href)="/([^"]*)"#\1="'"$site_url"/'\2"#g'
115 rel='s#(src|href)="([^:/"]*)"#\1="'"$base_url"/'\2"#g'
116 sed -E "$abs;$rel"
117}
118
119
120date_rfc_822() {
121 date -j '+%a, %d %b %Y %H:%M:%S %z' \
122 "$(echo "$1"| tr -cd '[:digit:]')0000"
123}
124
125
126render_items() {
127 while read -r i
128 do render_item "$1" "$2" "$i"
129 done
130}
131
132
133render_item() {
134 base="$1"
135 base_url="$2"
136 item="$3"
137
138 site_url="$(echo "$base_url"| sed 's#\(.*//.*\)/.*#\1#')"
139
140 date=$(echo "$item"|awk '{print$2}')
141
142 url=$(echo "$item"|awk '{print$1}')
143
144 f="$base/$url"
145 test -f "$f" && html=$(cat "$f")
146 test -f "${f%\.html}.md" && html=$(md_to_html "${f%\.html}.md")
147
148 description=$(
149 echo "$html" |
150 rel_to_abs_urls "$site_url" "$base_url" |
151 remove_nbsp | awk '/<!--BEGIN-->/,/<!--END-->/'
152 )
153 title=$(echo "$description" | get_title)
154 guid="$base_url/$(echo "$url" | sed 's#^/##')"
155
156 echo '
157<item>
158<guid>'"$guid"'</guid>
159<link>'"$guid"'</link>
160<pubDate>'"$(date_rfc_822 "$date")"'</pubDate>
161<title>'"$title"'</title>
162<description><![CDATA[
163
164'"$description"'
165
166]]></description>
167</item>'
168}
169
170
171render_feed() {
172 url="$1"
173 title=$(echo "$2" | remove_nbsp)
174 description="$3"
175
176 base_url="$(echo "$url" | cut -d '/' -f1-3)"
177
178 echo '<?xml version="1.0" encoding="UTF-8"?>
179<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
180<channel>
181<atom:link href="'"$url"'" rel="self" type="application/rss+xml" />
182<title>'"$title"'</title>
183<description>'"$description"'</description>
184<link>'"$base_url"'/</link>
185<lastBuildDate>'"$(date_rfc_822 date)"'</lastBuildDate>
186'"$(cat)"'
187</channel></rss>'
188}
189
190
191main "$@"