diff options
Diffstat (limited to 'rssg')
-rwxr-xr-x | rssg | 220 |
1 files changed, 0 insertions, 220 deletions
@@ -1,220 +0,0 @@ | |||
1 | #!/bin/sh | ||
2 | # | ||
3 | # https://www.romanzolotarev.com/bin/rssg | ||
4 | # Copyright 2018 Roman Zolotarev <hi@romanzolotarev.com> | ||
5 | # Copyright 2019 ng0 <ng0@n0.is> | ||
6 | # | ||
7 | # Permission to use, copy, modify, and/or distribute this software for any | ||
8 | # purpose with or without fee is hereby granted, provided that the above | ||
9 | # copyright notice and this permission notice appear in all copies. | ||
10 | # | ||
11 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
12 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
13 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
14 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
15 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
16 | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
17 | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
18 | # | ||
19 | set -e | ||
20 | |||
21 | # TODO: Fix date -j for GNU date. | ||
22 | # TODO: Fix base url (get_url) | ||
23 | # TODO: Fix link OR a in get_url | ||
24 | # TODO: get_title: be more dynamic about where the title can be. | ||
25 | |||
26 | main () { | ||
27 | test -n "$1" || usage | ||
28 | test -n "$2" || usage | ||
29 | test -f "$1" || no_file "$1" | ||
30 | |||
31 | |||
32 | index_file=$(readlink -f "$1") | ||
33 | test -z "${index_file##*html}" && html=$(cat "$index_file") | ||
34 | test -z "${index_file##*md}" && html=$(md_to_html "$index_file") | ||
35 | test -n "$html" || usage | ||
36 | |||
37 | base="${index_file%/*}" | ||
38 | base_url="$(echo "$html" | get_url | sed 's#\(.*\)/[^/]*#\1#')" | ||
39 | |||
40 | url=$( echo "$html" | get_url) | ||
41 | |||
42 | title="$2" | ||
43 | |||
44 | description=$( echo "$html" | get_description | | ||
45 | remove_tags | | ||
46 | remove_nbsp ) | ||
47 | |||
48 | items=$( echo "$html" | get_items) | ||
49 | |||
50 | rss=$( echo "$items" | | ||
51 | render_items "$base" "$base_url" | | ||
52 | render_feed "$url" "$title" "$description") | ||
53 | |||
54 | >&2 echo "[rssg] ${index_file##$(pwd)/} $(echo "$rss" | grep -c '<item>') items" | ||
55 | echo "$rss" | ||
56 | } | ||
57 | |||
58 | |||
59 | usage() { | ||
60 | echo "usage: ${0##*/} index.{html,md} title > rss.xml" >&2 | ||
61 | exit 1 | ||
62 | } | ||
63 | |||
64 | |||
65 | no_file() { | ||
66 | echo "${0##*/}: $1: No such file" >&2 | ||
67 | exit 2 | ||
68 | } | ||
69 | |||
70 | |||
71 | md_to_html() { | ||
72 | test -x "$(which lowdown)" || exit 3 | ||
73 | lowdown \ | ||
74 | -D html-skiphtml \ | ||
75 | -D smarty \ | ||
76 | -d metadata \ | ||
77 | -d autolink "$1" | ||
78 | } | ||
79 | |||
80 | |||
81 | get_title() { | ||
82 | awk -F '[<>]' '/<h1/ { gsub(/<h1>/, ""); gsub(/<\/h1>/, ""); sub(/ .*/, "", $3); print $0}' | | ||
83 | sed 's/^[ \t]*//;s/[ \t]*$//' | | ||
84 | awk 'FNR==1' | ||
85 | } | ||
86 | |||
87 | |||
88 | get_url() { | ||
89 | grep -i '<link .*rss.xml"' | head -1 | | ||
90 | sed 's#.*href="\(.*\)".*#\1#' | | ||
91 | sed 's#../../#https://gnunet.org/#' | ||
92 | } | ||
93 | |||
94 | |||
95 | get_items() { | ||
96 | grep -i 'href=".*" title="' | | ||
97 | sed 's#.*href="\(.*\)" title="\(.*\)">\(.*\)</a>.*#\1 \2 \3#' | ||
98 | } | ||
99 | |||
100 | |||
101 | get_description() { | ||
102 | start='sub("^.*<"s"*"t"("s"[^>]*)?>","")' | ||
103 | stop='sub("</"s"*"t""s"*>.*","")&&x=1' | ||
104 | awk -v 's=[[:space:]]' -v 't=[Pp]' "$start,$stop;x{exit}" | ||
105 | } | ||
106 | |||
107 | remove_tags() { | ||
108 | sed 's#<[^>]*>##g;s#</[^>]*>##g' | ||
109 | } | ||
110 | |||
111 | |||
112 | remove_nbsp() { | ||
113 | sed 's#\ # #g' | ||
114 | } | ||
115 | |||
116 | |||
117 | rel_to_abs_urls() { | ||
118 | site_url="$1" | ||
119 | base_url="$2" | ||
120 | |||
121 | abs='s#(src|href)="/([^"]*)"#\1="'"$site_url"/'\2"#g' | ||
122 | rel='s#(src|href)="([^:/"]*)"#\1="'"$base_url"/'\2"#g' | ||
123 | sed -E "$abs;$rel" | ||
124 | } | ||
125 | |||
126 | |||
127 | date_rfc_822() { | ||
128 | if date --version >/dev/null 2>&1; then | ||
129 | in="$(echo "$1"| tr -cd '[:digit:]')0000" | ||
130 | year=$(echo $in | cut -c1-4) | ||
131 | month=$(echo $in | cut -c5-6) | ||
132 | day=$(echo $in | cut -c7-8) | ||
133 | hour=$(echo $in | cut -c9-10) | ||
134 | minute=$(echo $in | cut -c11-12) | ||
135 | date -d "$(echo "$year"-"$month"-"$day" "$hour":"$minute")" --rfc-822 | ||
136 | else | ||
137 | date -j '+%a, %d %b %Y %H:%M:%S %z' \ | ||
138 | "$(echo "$1"| tr -cd '[:digit:]')0000" | ||
139 | fi | ||
140 | } | ||
141 | |||
142 | |||
143 | feed_date_rfc_822() { | ||
144 | if date --version >/dev/null 2>&1; then | ||
145 | k="$($1)" | ||
146 | date -d "$(echo "$k")" --rfc-822 | ||
147 | else | ||
148 | date -j '+%a, %d %b %Y %H:%M:%S %z' \ | ||
149 | "$(echo "$1"| tr -cd '[:digit:]')0000" | ||
150 | fi | ||
151 | } | ||
152 | |||
153 | |||
154 | render_items() { | ||
155 | while read -r i | ||
156 | do render_item "$1" "$2" "$i" | ||
157 | done | ||
158 | } | ||
159 | |||
160 | |||
161 | render_item() { | ||
162 | base="$1" | ||
163 | base_url="$2" | ||
164 | item="$3" | ||
165 | |||
166 | site_url="$(echo "$base_url"| sed 's#\(.*//.*\)/.*#\1#')" | ||
167 | |||
168 | date=$(echo "$item"|awk '{print$2}') | ||
169 | |||
170 | url=$(echo "$item"|awk '{print$1}') | ||
171 | |||
172 | f="$base/$url" | ||
173 | test -f "$f" && html=$(cat "$f") | ||
174 | test -f "${f%\.html}.md" && html=$(md_to_html "${f%\.html}.md") | ||
175 | |||
176 | description=$( | ||
177 | echo "$html" | | ||
178 | rel_to_abs_urls "$site_url" "$base_url" | | ||
179 | remove_nbsp | awk '/<!--BEGIN-->/,/<!--END-->/' | ||
180 | ) | ||
181 | title=$(echo "$description" | get_title) | ||
182 | guid="$base_url/$(echo "$url" | sed 's#^/##')" | ||
183 | |||
184 | echo ' | ||
185 | <item> | ||
186 | <guid>'"$guid"'</guid> | ||
187 | <link>'"$guid"'</link> | ||
188 | <pubDate>'"$(date_rfc_822 "$date")"'</pubDate> | ||
189 | <title>'"$title"'</title> | ||
190 | <description><![CDATA[ | ||
191 | |||
192 | '"$description"' | ||
193 | |||
194 | ]]></description> | ||
195 | </item>' | ||
196 | } | ||
197 | |||
198 | |||
199 | render_feed() { | ||
200 | url="$1" | ||
201 | title=$(echo "$2" | remove_nbsp) | ||
202 | description="$3" | ||
203 | |||
204 | base_url="$(echo "$url" | cut -d '/' -f1-3)" | ||
205 | |||
206 | echo '<?xml version="1.0" encoding="UTF-8"?> | ||
207 | <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"> | ||
208 | <channel> | ||
209 | <atom:link href="'"$url"'" rel="self" type="application/rss+xml" /> | ||
210 | <title>'"$title"'</title> | ||
211 | <language>en</language> | ||
212 | <description>'"$description"'</description> | ||
213 | <link>'"$base_url"'/</link> | ||
214 | <lastBuildDate>'"$(feed_date_rfc_822 date)"'</lastBuildDate> | ||
215 | '"$(cat)"' | ||
216 | </channel></rss>' | ||
217 | } | ||
218 | |||
219 | |||
220 | main "$@" | ||