aboutsummaryrefslogtreecommitdiff
path: root/rssg
diff options
context:
space:
mode:
Diffstat (limited to 'rssg')
-rwxr-xr-xrssg220
1 files changed, 0 insertions, 220 deletions
diff --git a/rssg b/rssg
deleted file mode 100755
index e21974f4..00000000
--- a/rssg
+++ /dev/null
@@ -1,220 +0,0 @@
1#!/bin/sh
2#
3# https://www.romanzolotarev.com/bin/rssg
4# Copyright 2018 Roman Zolotarev <hi@romanzolotarev.com>
5# Copyright 2019 ng0 <ng0@n0.is>
6#
7# Permission to use, copy, modify, and/or distribute this software for any
8# purpose with or without fee is hereby granted, provided that the above
9# copyright notice and this permission notice appear in all copies.
10#
11# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18#
19set -e
20
21# TODO: Fix date -j for GNU date.
22# TODO: Fix base url (get_url)
23# TODO: Fix link OR a in get_url
24# TODO: get_title: be more dynamic about where the title can be.
25
26main () {
27 test -n "$1" || usage
28 test -n "$2" || usage
29 test -f "$1" || no_file "$1"
30
31
32 index_file=$(readlink -f "$1")
33 test -z "${index_file##*html}" && html=$(cat "$index_file")
34 test -z "${index_file##*md}" && html=$(md_to_html "$index_file")
35 test -n "$html" || usage
36
37 base="${index_file%/*}"
38 base_url="$(echo "$html" | get_url | sed 's#\(.*\)/[^/]*#\1#')"
39
40 url=$( echo "$html" | get_url)
41
42 title="$2"
43
44 description=$( echo "$html" | get_description |
45 remove_tags |
46 remove_nbsp )
47
48 items=$( echo "$html" | get_items)
49
50 rss=$( echo "$items" |
51 render_items "$base" "$base_url" |
52 render_feed "$url" "$title" "$description")
53
54 >&2 echo "[rssg] ${index_file##$(pwd)/} $(echo "$rss" | grep -c '<item>') items"
55 echo "$rss"
56}
57
58
59usage() {
60 echo "usage: ${0##*/} index.{html,md} title > rss.xml" >&2
61 exit 1
62}
63
64
65no_file() {
66 echo "${0##*/}: $1: No such file" >&2
67 exit 2
68}
69
70
71md_to_html() {
72 test -x "$(which lowdown)" || exit 3
73 lowdown \
74 -D html-skiphtml \
75 -D smarty \
76 -d metadata \
77 -d autolink "$1"
78}
79
80
81get_title() {
82 awk -F '[<>]' '/<h1/ { gsub(/<h1>/, ""); gsub(/<\/h1>/, ""); sub(/ .*/, "", $3); print $0}' |
83 sed 's/^[ \t]*//;s/[ \t]*$//' |
84 awk 'FNR==1'
85}
86
87
88get_url() {
89 grep -i '<link .*rss.xml"' | head -1 |
90 sed 's#.*href="\(.*\)".*#\1#' |
91 sed 's#../../#https://gnunet.org/#'
92}
93
94
95get_items() {
96 grep -i 'href=".*" title="' |
97 sed 's#.*href="\(.*\)" title="\(.*\)">\(.*\)</a>.*#\1 \2 \3#'
98}
99
100
101get_description() {
102 start='sub("^.*<"s"*"t"("s"[^>]*)?>","")'
103 stop='sub("</"s"*"t""s"*>.*","")&&x=1'
104 awk -v 's=[[:space:]]' -v 't=[Pp]' "$start,$stop;x{exit}"
105}
106
107remove_tags() {
108 sed 's#<[^>]*>##g;s#</[^>]*>##g'
109}
110
111
112remove_nbsp() {
113 sed 's#\&nbsp;# #g'
114}
115
116
117rel_to_abs_urls() {
118 site_url="$1"
119 base_url="$2"
120
121 abs='s#(src|href)="/([^"]*)"#\1="'"$site_url"/'\2"#g'
122 rel='s#(src|href)="([^:/"]*)"#\1="'"$base_url"/'\2"#g'
123 sed -E "$abs;$rel"
124}
125
126
127date_rfc_822() {
128 if date --version >/dev/null 2>&1; then
129 in="$(echo "$1"| tr -cd '[:digit:]')0000"
130 year=$(echo $in | cut -c1-4)
131 month=$(echo $in | cut -c5-6)
132 day=$(echo $in | cut -c7-8)
133 hour=$(echo $in | cut -c9-10)
134 minute=$(echo $in | cut -c11-12)
135 date -d "$(echo "$year"-"$month"-"$day" "$hour":"$minute")" --rfc-822
136 else
137 date -j '+%a, %d %b %Y %H:%M:%S %z' \
138 "$(echo "$1"| tr -cd '[:digit:]')0000"
139 fi
140}
141
142
143feed_date_rfc_822() {
144 if date --version >/dev/null 2>&1; then
145 k="$($1)"
146 date -d "$(echo "$k")" --rfc-822
147 else
148 date -j '+%a, %d %b %Y %H:%M:%S %z' \
149 "$(echo "$1"| tr -cd '[:digit:]')0000"
150 fi
151}
152
153
154render_items() {
155 while read -r i
156 do render_item "$1" "$2" "$i"
157 done
158}
159
160
161render_item() {
162 base="$1"
163 base_url="$2"
164 item="$3"
165
166 site_url="$(echo "$base_url"| sed 's#\(.*//.*\)/.*#\1#')"
167
168 date=$(echo "$item"|awk '{print$2}')
169
170 url=$(echo "$item"|awk '{print$1}')
171
172 f="$base/$url"
173 test -f "$f" && html=$(cat "$f")
174 test -f "${f%\.html}.md" && html=$(md_to_html "${f%\.html}.md")
175
176 description=$(
177 echo "$html" |
178 rel_to_abs_urls "$site_url" "$base_url" |
179 remove_nbsp | awk '/<!--BEGIN-->/,/<!--END-->/'
180 )
181 title=$(echo "$description" | get_title)
182 guid="$base_url/$(echo "$url" | sed 's#^/##')"
183
184 echo '
185<item>
186<guid>'"$guid"'</guid>
187<link>'"$guid"'</link>
188<pubDate>'"$(date_rfc_822 "$date")"'</pubDate>
189<title>'"$title"'</title>
190<description><![CDATA[
191
192'"$description"'
193
194]]></description>
195</item>'
196}
197
198
199render_feed() {
200 url="$1"
201 title=$(echo "$2" | remove_nbsp)
202 description="$3"
203
204 base_url="$(echo "$url" | cut -d '/' -f1-3)"
205
206 echo '<?xml version="1.0" encoding="UTF-8"?>
207<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
208<channel>
209<atom:link href="'"$url"'" rel="self" type="application/rss+xml" />
210<title>'"$title"'</title>
211<language>en</language>
212<description>'"$description"'</description>
213<link>'"$base_url"'/</link>
214<lastBuildDate>'"$(feed_date_rfc_822 date)"'</lastBuildDate>
215'"$(cat)"'
216</channel></rss>'
217}
218
219
220main "$@"