diff options
Diffstat (limited to 'rssg')
-rwxr-xr-x | rssg | 191 |
1 files changed, 191 insertions, 0 deletions
@@ -0,0 +1,191 @@ | |||
1 | #!/bin/sh | ||
2 | # | ||
3 | # https://www.romanzolotarev.com/bin/rssg | ||
4 | # Copyright 2018 Roman Zolotarev <hi@romanzolotarev.com> | ||
5 | # Copyright 2019 ng0 <ng0@n0.is> | ||
6 | # | ||
7 | # Permission to use, copy, modify, and/or distribute this software for any | ||
8 | # purpose with or without fee is hereby granted, provided that the above | ||
9 | # copyright notice and this permission notice appear in all copies. | ||
10 | # | ||
11 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
12 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
13 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
14 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
15 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
16 | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
17 | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
18 | # | ||
19 | set -e | ||
20 | |||
21 | |||
22 | main () { | ||
23 | test -n "$1" || usage | ||
24 | test -n "$2" || usage | ||
25 | test -f "$1" || no_file "$1" | ||
26 | |||
27 | |||
28 | index_file=$(readlink -f "$1") | ||
29 | test -z "${index_file##*html}" && html=$(cat "$index_file") | ||
30 | test -z "${index_file##*md}" && html=$(md_to_html "$index_file") | ||
31 | test -n "$html" || usage | ||
32 | |||
33 | base="${index_file%/*}" | ||
34 | base_url="$(echo "$html" | get_url | sed 's#\(.*\)/[^/]*#\1#')" | ||
35 | |||
36 | url=$( echo "$html" | get_url) | ||
37 | |||
38 | title="$2" | ||
39 | |||
40 | description=$( echo "$html" | get_description | | ||
41 | remove_tags | | ||
42 | remove_nbsp ) | ||
43 | |||
44 | items=$( echo "$html" | get_items) | ||
45 | |||
46 | rss=$( echo "$items" | | ||
47 | render_items "$base" "$base_url" | | ||
48 | render_feed "$url" "$title" "$description") | ||
49 | |||
50 | >&2 echo "[rssg] ${index_file##$(pwd)/} $(echo "$rss" | grep -c '<item>') items" | ||
51 | echo "$rss" | ||
52 | } | ||
53 | |||
54 | |||
55 | usage() { | ||
56 | echo "usage: ${0##*/} index.{html,md} title > rss.xml" >&2 | ||
57 | exit 1 | ||
58 | } | ||
59 | |||
60 | |||
61 | no_file() { | ||
62 | echo "${0##*/}: $1: No such file" >&2 | ||
63 | exit 2 | ||
64 | } | ||
65 | |||
66 | |||
67 | md_to_html() { | ||
68 | test -x "$(which lowdown)" || exit 3 | ||
69 | lowdown \ | ||
70 | -D html-skiphtml \ | ||
71 | -D smarty \ | ||
72 | -d metadata \ | ||
73 | -d autolink "$1" | ||
74 | } | ||
75 | |||
76 | |||
77 | get_title() { | ||
78 | awk 'tolower($0)~/^<h1/{gsub(/<[^>]*>/,"",$0);print;exit}' | ||
79 | } | ||
80 | |||
81 | |||
82 | get_url() { | ||
83 | grep -i '<a .*rss.xml"' | head -1 | | ||
84 | sed 's#.*href="\(.*\)".*#\1#' | ||
85 | } | ||
86 | |||
87 | |||
88 | get_items() { | ||
89 | grep -i 'href=".*" title="' | | ||
90 | sed 's#.*href="\(.*\)" title="\(.*\)">\(.*\)</a>.*#\1 \2 \3#' | ||
91 | } | ||
92 | |||
93 | |||
94 | get_description() { | ||
95 | start='sub("^.*<"s"*"t"("s"[^>]*)?>","")' | ||
96 | stop='sub("</"s"*"t""s"*>.*","")&&x=1' | ||
97 | awk -v 's=[[:space:]]' -v 't=[Pp]' "$start,$stop;x{exit}" | ||
98 | } | ||
99 | |||
100 | remove_tags() { | ||
101 | sed 's#<[^>]*>##g;s#</[^>]*>##g' | ||
102 | } | ||
103 | |||
104 | |||
105 | remove_nbsp() { | ||
106 | sed 's#\ # #g' | ||
107 | } | ||
108 | |||
109 | |||
110 | rel_to_abs_urls() { | ||
111 | site_url="$1" | ||
112 | base_url="$2" | ||
113 | |||
114 | abs='s#(src|href)="/([^"]*)"#\1="'"$site_url"/'\2"#g' | ||
115 | rel='s#(src|href)="([^:/"]*)"#\1="'"$base_url"/'\2"#g' | ||
116 | sed -E "$abs;$rel" | ||
117 | } | ||
118 | |||
119 | |||
120 | date_rfc_822() { | ||
121 | date -j '+%a, %d %b %Y %H:%M:%S %z' \ | ||
122 | "$(echo "$1"| tr -cd '[:digit:]')0000" | ||
123 | } | ||
124 | |||
125 | |||
126 | render_items() { | ||
127 | while read -r i | ||
128 | do render_item "$1" "$2" "$i" | ||
129 | done | ||
130 | } | ||
131 | |||
132 | |||
133 | render_item() { | ||
134 | base="$1" | ||
135 | base_url="$2" | ||
136 | item="$3" | ||
137 | |||
138 | site_url="$(echo "$base_url"| sed 's#\(.*//.*\)/.*#\1#')" | ||
139 | |||
140 | date=$(echo "$item"|awk '{print$2}') | ||
141 | |||
142 | url=$(echo "$item"|awk '{print$1}') | ||
143 | |||
144 | f="$base/$url" | ||
145 | test -f "$f" && html=$(cat "$f") | ||
146 | test -f "${f%\.html}.md" && html=$(md_to_html "${f%\.html}.md") | ||
147 | |||
148 | description=$( | ||
149 | echo "$html" | | ||
150 | rel_to_abs_urls "$site_url" "$base_url" | | ||
151 | remove_nbsp | awk '/<!--BEGIN-->/,/<!--END-->/' | ||
152 | ) | ||
153 | title=$(echo "$description" | get_title) | ||
154 | guid="$base_url/$(echo "$url" | sed 's#^/##')" | ||
155 | |||
156 | echo ' | ||
157 | <item> | ||
158 | <guid>'"$guid"'</guid> | ||
159 | <link>'"$guid"'</link> | ||
160 | <pubDate>'"$(date_rfc_822 "$date")"'</pubDate> | ||
161 | <title>'"$title"'</title> | ||
162 | <description><![CDATA[ | ||
163 | |||
164 | '"$description"' | ||
165 | |||
166 | ]]></description> | ||
167 | </item>' | ||
168 | } | ||
169 | |||
170 | |||
171 | render_feed() { | ||
172 | url="$1" | ||
173 | title=$(echo "$2" | remove_nbsp) | ||
174 | description="$3" | ||
175 | |||
176 | base_url="$(echo "$url" | cut -d '/' -f1-3)" | ||
177 | |||
178 | echo '<?xml version="1.0" encoding="UTF-8"?> | ||
179 | <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"> | ||
180 | <channel> | ||
181 | <atom:link href="'"$url"'" rel="self" type="application/rss+xml" /> | ||
182 | <title>'"$title"'</title> | ||
183 | <description>'"$description"'</description> | ||
184 | <link>'"$base_url"'/</link> | ||
185 | <lastBuildDate>'"$(date_rfc_822 date)"'</lastBuildDate> | ||
186 | '"$(cat)"' | ||
187 | </channel></rss>' | ||
188 | } | ||
189 | |||
190 | |||
191 | main "$@" | ||