aboutsummaryrefslogtreecommitdiff
path: root/gnu/gnunet/config/parser.scm
blob: 022449ea9f57caeec5043594849548097bcfcec7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
;;   This file is part of scheme-GNUnet, a partial Scheme port of GNUnet.
;;   Copyright (C) 2021 Maxime Devos <maximedevos@telenet.be>
;;   Copyright (C) 2006, 2007, 2008, 2009, 2013, 2020 GNUnet e.V.
;;
;;   scheme-GNUnet is free software: you can redistribute it and/or modify it
;;   under the terms of the GNU Affero General Public License as published
;;   by the Free Software Foundation, either version 3 of the License,
;;   or (at your option) any later version.
;;
;;   scheme-GNUnet is distributed in the hope that it will be useful, but
;;   WITHOUT ANY WARRANTY; without even the implied warranty of
;;   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;   Affero General Public License for more details.
;;
;;   You should have received a copy of the GNU Affero General Public License
;;   along with this program.  If not, see <http://www.gnu.org/licenses/>.
;;
;;   SPDX-License-Identifier: AGPL-3.0-or-later

;; Author: Christian Grothoff (upstream, C)
;; Author: Maxime Devos (downstream, Scheme)
;; Brief: parse GNUnet configuration files.

;; TODO: unquoting
(define-library (gnu gnunet config parser)
  (export parse-line ;; line parser
	  <position:%> make-%-position %-position?
	  position:%
	  <position:#> make-#-position #{#-position?}#
	  position:#
	  <position:=> make-=-position =-position?
	  position:variable-start position:variable-end
	  position:= position:value-start position:value-end
	  #{<position:[]>}# #{make-[]-position}# #{[]-position?}#
	  position:section-name-start position:section-name-end
	  <position:@inline@> make-@inline@-position @inline@-position?
	  position:@inline@-start position:@inline@-end
	  position:@inline@-filename-start position:@inline@-filename-end

	  ;; expansion parser (data types)
	  <expo:literal> make-literal-position literal-position?
	  <expo:$> make-$-position $-position?
	  #{<expo:${}>}# #{make-${}-position}# #{${}-position?}#
	  #{<expo:${:-}>}# #{make-${:-}-position}# #{${:-}-position?}#

	  expo:literal-start expo:literal-end
	  expo:$-name-start expo:$-name-end
	  #{expo:${}-name-start}# #{expo:${}-name-end}#
	  #{expo:${:-}-name-start}# #{expo:${:-}-name-end}#
	  #{expo:${:-}-value-start}# #{expo:${:-}-value-end}#
	  #{expo:${:-}-value-parts}#

	  ;; expansion parser (conditions)
	  &expansion-violation &empty-variable-violation &missing-close
	  make-empty-variable-violation make-missing-close-violation
	  expansion-violation? empty-variable-violation? missing-close-violation?
	  expansion-violation-position empty-variable-kind missing-close-kind

	  parse-expandable* parse-expandable)
  (import (only (guile)
		eval-when quote char-set
		char-set:whitespace
		string-index
		string-skip string-skip-right string-prefix?)
	  (only (rnrs base)
		begin define lambda define-syntax syntax-rules ...
		assert or + - if char=? not and exact? integer?
		< <= = cons values reverse pair? null?
		string-length string-ref)
	  (only (rnrs control)
		unless)
	  (only (rnrs exceptions)
		raise)
	  (only (rnrs records syntactic)
		define-record-type)
	  (only (rnrs conditions)
		define-condition-type
		&lexical)
	  (only (rnrs lists) memq)
	  (only (gnu gnunet utils hat-let)
		let^))
  (begin
    
    ;; * The position-preserving line parser
    ;;
    ;; This parser operates on a per-line basis without any state.
    ;; It does not directly return configuration values.  Rather,
    ;; it returns the start and end positions.

    ;; Divergence from upstream GNUnet:
    ;; upstream only recognises #\newline, #\return and #\tab,
    ;; while this includes other Unicode whitespace as well.
    ;; Maybe we shouldn't.
    (define whitespace char-set:whitespace)

    ;; The output record types of @code{parse-line}.

    ;; Only defining this at expansion time halves the number
    ;; of output lines of "guild disassemble".
    (eval-when (expand)
      (define-syntax exact-integers?
	(syntax-rules ()
	  ((_ x ...)
	   (and (and (integer? x) (exact? x))
		...))))
      (define-syntax define-positions-type
        (syntax-rules ()
	  ((_ (<positions:type> make-type-positions type-positions?)
	      ((ascending-position-field accessor) ...)
	      (additional-restriction ...)
	      docstring)
	   (define-record-type
	       (<positions:type> make-type-positions type-positions?)
	     (fields (immutable ascending-position-field accessor) ...)
	     (opaque #t)
	     (sealed #t)
	     (protocol
	      (lambda (%make)
		docstring
		(lambda (ascending-position-field ...)
		  (assert (and (exact-integers? ascending-position-field ...)
			       (<= 0 ascending-position-field ...)
			       additional-restriction ...))
		  (%make ascending-position-field ...)))))))))

    (define-positions-type (<position:%> make-%-position %-position?)
      ((% position:%))
      ()
      "@var{%} is the position of the @code{#\\%} comment character in
a comment.")

    (define-positions-type (<position:#> make-#-position #{#-position?}#)
      ((#{#}# position:#))
      ()
      "@var{#} is the position of the @code{#\\#} comment character in
a comment.")

    (define-positions-type (<position:=> make-=-position =-position?)
      ((variable-start position:variable-start)
       (variable-end position:variable-end)
       (= position:=)
       (value-start position:value-start)
       (value-end position:value-end))
      ;; TODO: should empty variable names be allowed?
      ((< variable-start variable-end)
       (<= = value-start))
      "@var{variable-start} (inclusive) and @var{variable-end} (exclusive) are
the start and end positions of the variable name in an assignment.  @var{=} is
the position of the equality sign.  @var{value-start} (inclusive) and
@var{value-end} (exclusive) are the start and end positions of the value.

If the value is empty, then by convention @var{variable-start} and
@var{variable-end} are the positions right after the equality sign.")

    (define-positions-type
      (#{<position:[]>}# #{make-[]-position}# #{[]-position?}#)
      ((section-name-start position:section-name-start)
       (section-name-end position:section-name-end))
      ;; TODO: should empty section names be allowed?
      ;; Also, maybe impose some restrictions on names?
      ;; (Likewise for variable names)
      ()
      "@var{section-name-start} (inclusive) and @var{section-name-end}
(exclusive) are the start and end positions of a section name.")

    (define-positions-type (<position:@inline@> make-@inline@-position
						@inline@-position?)
      ((@inline@-start position:@inline@-start)
       (@inline@-end position:@inline@-end))
      ;; TODO: should empty file names be allowed?
      ;; If so, change < to <=.
      ((< (string-length "@INLINE@ ") (- @inline@-end @inline@-start)))
      "@var{@inline@-start} (inclusive) and @var{@inline@-end} (exclusive)
are the start and end positions of an inclusion directive.")

    (define (position:@inline@-filename-start position)
      "The start position (inclusive) of the file name of the inclusion
directive described by @var{filename}."
      (+ (position:@inline@-start position)
	 (string-length "@INLINE@ ")))

    ;; The end position (exclusive) of the file name.
    (define position:@inline@-filename-end position:@inline@-end)

    (define (parse-line line)
      "Parse a single line @var{line} (without the end of line characters)
from a GNUnet configuration file, into one of its possible types.

@begin itemize
@item The boolean @code{#false} if @var{line} is not recognised.
@item The boolean @code{#true} if @var{line} is an empty line.
@item A @code{<position:%>} or @code{<position:#>} for comment lines
 started with @code{#\\%} and @code{#\\#} respectively.
@item A @code{<position:=>} for variable assignements.
@item A @code{<position:[]>} for section names.
@item A @code{<position:@inline@>} for inclusion directives.
@end itemize

Other syntax may be supported in the future, in which case other data
of other types may be returned."
      ;; Ignore leading whitespace.
      (let^ ((! start-inclusive (string-skip line whitespace))
	     ;; Did the line consist of only whitespace?
	     ;; Then stop.
	     (? (not start-inclusive) #true)
	     (! first-important-character
		(string-ref line start-inclusive))
	     ;; Is this a comment?  Then stop.
	     (? (char=? first-important-character #\#)
		(make-#-position start-inclusive))
	     (? (char=? first-important-character #\%)
		(make-%-position start-inclusive))
	     ;; Ignore trailing whitespace.
	     (! end-inclusive
		(string-skip-right line whitespace start-inclusive))
	     (!! end-inclusive)
	     ;; Is this a section name?  Then stop.
	     (? (and (char=? #\[ first-important-character)
		     (char=? #\] (string-ref line end-inclusive)))
		(#{make-[]-position}# (+ 1 start-inclusive) end-inclusive))
	     ;; Is this an inclusion directive?  Then stop.
	     ;; TODO upstream GNUnet compares case-insensitively.
	     ;; Is this a bug or a feature?
	     (? (and (char=? #\@ first-important-character)
		     (string-prefix? "@INLINE@ " line 1
				     (string-length "@INLINE@ ")
				     ;; XXX what if the file name is empty?
				     (+ 1 start-inclusive) (+ 1 end-inclusive)))
		(make-@inline@-position start-inclusive
					(+ 1 end-inclusive)))
	     ;; Maybe this is an assignment; search for the equality
	     ;; sign.
	     (! =-position (string-index line #\= start-inclusive
					 (+ 1 end-inclusive)))
	     ;; no clue!
	     (? (not =-position) #f)
	     ;; Remove trailing whitespace from the variable name
	     ;; (the ‘tag’).
	     (! variable-end-inclusive (string-skip-right line whitespace
							  start-inclusive
							  =-position))
	     ;; TODO should empty tags by allowed?
	     ;; Bail out if the variable name consists of only whitespace.
	     (? (not variable-end-inclusive) #f)
	     (! variable-end (+ 1 variable-end-inclusive))
	     ;; Remove whitespace from the variable value.
	     (! value-start (string-skip line whitespace (+ 1 =-position)
					 (+ 1 end-inclusive)))
	     (! value-start (or value-start (+ 1 end-inclusive))))
	    (make-=-position start-inclusive variable-end
			     =-position value-start (+ 1 end-inclusive))))

    
    ;; * The (recursive) position-preserving variable substitutions parser.
    ;; We support: "literal-stuff", "${var}" "$var", "${VAR:-stuff}".
    ;; First define some data types.

    (define-positions-type (<expo:literal> make-literal-position literal-position?)
      ((literal-start expo:literal-start)
       (literal-end expo:literal-end))
      ((< literal-start literal-end))
      "@var{literal-start} (inclusive) and @var{literal-end} (exclusive) are
the start and end positions of a region of texts without expansions.")

    (define-positions-type (<expo:$> make-$-position $-position?)
      (($-name-start expo:$-name-start)
       ($-name-end expo:$-name-end))
      ((< $-name-start $-name-end))
      "@var{$-name-start} (inclusive) and @var{$-name-end} (exclusive) are the
start and end positions of a variable name in an expansion X/$VAR/etcetera.")

    (define-positions-type (#{<expo:${}>}# #{make-${}-position}#
			    #{${}-position?}#)
      ((#{${}-name-start}# #{expo:${}-name-start}#)
       (#{${}-name-end}# #{expo:${}-name-end}#))
      ((< #{${}-name-start}# #{${}-name-end}#))
      "@var{$@{@}-name-start} (inclusive) and @var{$@{@}-name-end}
(exclusive) are the start and end positions of a variable name in an expansion
${VAR}.")

    (define-record-type (#{<expo:${:-}>}# #{make-${:-}-position}#
			 #{${:-}-position?}#)
      (fields (immutable #{${:-}-name-start}# #{expo:${:-}-name-start}#)
	      (immutable #{${:-}-name-end}# #{expo:${:-}-name-end}#)
	      (immutable #{${:-}-value-start}# #{expo:${:-}-value-start}#)
	      (immutable #{${:-}-value-end}# #{expo:${:-}-value-end}#)
	      (immutable #{${:-}-value-parts}# #{expo:${:-}-value-parts}#))
      (sealed #t)
      (opaque #t)
      (protocol
       (lambda (%make)
	 (lambda (#{${:-}-name-start}# #{${:-}-name-end}#
		  #{${:-}-value-start}# #{${:-}-value-end}#
		  #{${:-}-value-parts}#)
	   "@var{$@{:-@}-name-start} (inclusive) and @var{$@{:-@}-name-end}
(exclusive) are the start and end positions of a variable name in an expansion
@samp{$@{VAR:-DEFAULT-VALUE@}}.  @var{$@{:-@}-value-start} (inclusive) and
@var{$@{:-@}-value-end} (exclusive) are the start and end positions of
DEFAULT-VALUE.  @var{${:-}-value-parts} is an ordered list of contiguous
expansion position objects, representing the structure of @samp{DEFAULT-VALUE}
(unverified)."
	   (assert (and (exact-integers?
			 #{${:-}-name-start}# #{${:-}-name-end}#
			 #{${:-}-value-start}# #{${:-}-value-end}#)
			(<= 0 #{${:-}-name-start}#)
			(< #{${:-}-name-start}# #{${:-}-name-end}#)
			(= (- #{${:-}-value-start}# #{${:-}-name-end}#) 2)
			(<= #{${:-}-value-start}# #{${:-}-value-end}#)
			(or (pair? #{${:-}-value-parts}#)
			    (null? #{${:-}-value-parts}#))))
	   (%make #{${:-}-name-start}# #{${:-}-name-end}#
		  #{${:-}-value-start}# #{${:-}-value-end}#
		  #{${:-}-value-parts}#)))))

    ;; Now define the possible syntax errors.
    (define-condition-type &expansion-violation &lexical
      %make-expansion-violation expansion-violation?
      (position expansion-violation-position))

    (define (make-expansion-violation position)
      (assert (and (exact-integers? position) (<= 0 position)))
      (%make-expansion-violation position))

    (define-condition-type &empty-variable-violation &expansion-violation
      %make-empty-variable-violation empty-variable-violation?
      ;; $, ${} or ${:-}
      (kind empty-variable-kind))

    (define (make-empty-variable-violation position kind)
      "Make a condition indicating at position @var{position} a variable
name was expected, but only an empty string was found.  The symbol @var{kind}
indicates the type of variable expansion found: @code{$@{:-@}} for variable
expansions with a default, @code{$@{@}} for braced variable expansions without
default and @code{$} for unbraced variable expansions."
      (assert (and (exact-integers? position)
		   (<= 0 position)
		   (memq kind '($ #{${}}# #{${:-}}#))))
      (%make-empty-variable-violation position kind))

    (define-condition-type &missing-close &expansion-violation
      %make-missing-close-violation missing-close-violation?
      ;; ${} or ${:-}
      (kind missing-close-kind))

    (define (make-missing-close-violation position kind)
      "Make a condition indicating at position @var{position} a closing
brace (@code{#\\@}) was expected, but not found.  The symbol @var{kind}
indicates the type of variable expansion found, as in
@code{empty-variable-violation}, though it cannot be @code{$@}."
      (assert (and (exact-integers? position)
		   (<= 0 position)
		   (memq kind '(#{${}}# #{${:-}}#))))
      (%make-missing-close-violation position kind))

    (define cs::-or-close (char-set #\: #\}))
    (define cs:$-or-close (char-set #\$ #\}))
    ;; TODO: should #\0 be included?  It seems to be
    ;; ‘merely’ an artifact of the C implementation.
    ;; TODO: add #\{?
    (define cs:unbraced-end/nested (char-set #\/ #\\ #\0 #\ #\}))
    ;; TODO add #\{, #\} here, I guess? For consistency with bash.
    (define cs:unbraced-end (char-set #\/ #\\ #\0 #\ ))

    (define (parse-expandable* text start end nested?)
      "Search in @var{text} for variable references to expand, returning
a list of expansible position objects and the end position (exclusive,
does not include closing brace).

Alternatively, raise an @code{&expansion-violation}.  If @var{nested?}
is trueish, stop at (and expect) an unbalanced close brace.
If @var{nested?} is Scheme-trueish, it is used as the ‘kind’ argument for
@code{&expansion-violation}.

(In the current parser, in practice this will be @code{#f} or @code{@{:-@}},
but perhaps the syntax will be extended in the future.)

TODO: there currently is not a dedicated condition type for ${a:} and ${a:+}
(in the first, a - after the : is missing, and in the second, + is
invalid).

If @var{nested?} is Scheme-falsish, then the second return value is simply
@var{end} itself."
      (assert (and (exact-integers? start end)
		   (<= 0 start)
		   (<= start end)
		   (<= end (string-length text))))
      (let^ ((/o/ loop
		  ;; in reverse chronological order
		  (accumulated '())
		  ;; where to start searching for the next expansion object
		  (start start))
	     ;; Search for a $ to expand (or a closing brace to stop at,
	     ;; when nested/recursing).
	     (! dollar-close (string-index text (if nested?
						    cs:$-or-close
						    #\$) start end))
	     ;; Add the literal region of text to @var{accumulated}
	     ;; (unless it is empty).
	     (! accumulated
		(if (or (= start (or dollar-close end)))
		    accumulated
		    (cons (make-literal-position start (or dollar-close end))
			  accumulated)))
	     ;; No #\$ and we're not nested/recursing?
	     ;; Then we're done.
	     (? (and (not nested?) (not dollar-close))
		(values (reverse accumulated) end))
	     ;; No #\$ or #\}, but we're nested/recursing?  Then
	     ;; we're missing a close brace.
	     (? (and nested? (not dollar-close))
		(raise (make-missing-close-violation end nested?)))
	     ;; (@var{dollar-close} is trueish now)
	     ;; Did we find a closing brace when nested?
	     ;; Then we're done
	     (? (and nested? dollar-close
		     (char=? #\} (string-ref text dollar-close)))
		(values (reverse accumulated) dollar-close))
	     ;; The character at @var{dollar-close} is a dollar now.
	     (! dollar dollar-close)
	     ;; Empty variable names are not allowed.
	     (? (= (+ 1 dollar) end)
		;; passing @var{kind} here would be incorrect!
		(raise (make-empty-variable-violation (+ 1 dollar) '$)))
	     (! next-character (string-ref text (+ 1 dollar)))
	     ;; Is this an braced variable expansion?
	     (? (char=? next-character #\{)
		(let^ ((! name-start (+ 2 dollar))
		       ;; Then search for a closing }
		       ;; or the : in ${VAR:-DEFAULT}.
		       (! name-end (string-index text cs::-or-close
						 name-start end))
		       ;; There should eventually be at least
		       ;; a closing }.
		       (? (not name-end)
			  (raise (make-missing-close-violation end '#{${}}#)))
		       (! name-end-character
			  (string-ref text name-end))
		       ;; Empty variable names are not allowed.
		       (? (= name-start name-end)
			  (raise (make-empty-variable-violation
				  name-end
				  (if (char=? name-end-character #\:)
				      '#{${:-}}#
				      '#{${}}#))))
		       ;; Was this ${NAME}?
		       (? (char=? name-end-character #\})
			  ;; Then add it to @var{accumulated} and
			  ;; continue.
			  (loop (cons (#{make-${}-position}# name-start name-end)
				      accumulated)
				(+ 1 name-end)))
		       ;; Otherwise, it was ${NAME:-VALUE}.
		       ;; But verify - exists.
		       (? (not (and (< (+ 1 name-end) end)
				    (char=? (string-ref text (+ 1 name-end))
					    #\-)))
			  ;; TODO a more specific condition would be nice.
			  (raise (%make-expansion-violation (+ 1 name-end))))
		       (! value-start (+ 2 name-end))
		       ;; Now parse VALUE in ${NAME:-VALUE}.
		       ;;
		       ;; This procedure call will verify a close
		       ;; brace at @var{default-end} exist.
		       (<-- (value-parts value-end)
			    (parse-expandable* text value-start end '#{${:-}}#))
		       ;; This was violated at some draft of this procedure.
		       ;; Verify it is fixed.
		       (!! (or (pair? value-parts)
			       (null? value-parts)))
		       ;; So the following should be true.
		       ;; (Not related to previous comment.)
		       (!! (char=? #\} (string-ref text value-end))))
		      ;; Add the variable expansion to @var{accumulated}
		      ;; and continue.
		      (loop (cons (#{make-${:-}-position}# name-start name-end
				   value-start value-end value-parts)
				  accumulated)
			    ;; + 1: eat the closing brace.
			    (+ 1 value-end))))
	     ;; Then it is an unbraced $VARIABLE expansion.
	     (! name-start (+ 1 dollar))
	     (! name-end (string-index text (if nested?
						cs:unbraced-end/nested
						cs:unbraced-end)
				       name-start end))
	     (! name-end (or name-end end))
	     ;; Empty variable names are not allowed.
	     (? (= name-start name-end)
		(raise (make-empty-variable-violation name-end '$))))
	    ;; Add the variable to @var{accumulated} and continue.
	    (loop (cons (make-$-position name-start name-end) accumulated)
		  name-end)))

    (define (parse-expandable text)
      (parse-expandable* text 0 (string-length text) #f))))