aboutsummaryrefslogtreecommitdiff
path: root/gnu/gnunet/metadata.scm
blob: fc57c48313cf54aed89f80b8a5f25d786014549c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
;;   This file is part of scheme-GNUnet, a partial Scheme port of GNUnet.
;;   Copyright (C) 2003, 2004, 2005, 2006, 2008, 2009, 2010, 2020 GNUnet e.V.
;;
;;   GNUnet is free software: you can redistribute it and/or modify it
;;   under the terms of the GNU Affero General Public License as published
;;   by the Free Software Foundation, either version 3 of the License,
;;   or (at your option) any later version.
;;
;;   GNUnet is distributed in the hope that it will be useful, but
;;   WITHOUT ANY WARRANTY; without even the implied warranty of
;;   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;   Affero General Public License for more details.
;;
;;   You should have received a copy of the GNU Affero General Public License
;;   along with this program.  If not, see <http://www.gnu.org/licenses/>.
;;
;;   SPDX-License-Identifier: AGPL-3.0-or-later
;;
;;   As a special exception to the GNU Affero General Public License,
;;   the file may be relicensed under any license used for
;;   most source code of GNUnet 0.13.1, or later versions, as published by
;;   GNUnet e.V.

;; Upstream author: Christian Grothoff
;; Upstream source: gnunet-0.13.1/util/container_meta_data.c
;; Scheme port author: Maxime Devos
;; Scheme module: (gnu gnunet metadata)
;; Brief: Storing of meta data

;; Deviations from upstream:
;;  * file names in meta-data are not automatically POSIXified.

;; TODO: (de-)serialisation, dependencies, other procedures
(library (gnu gnunet metadata)
  (export meta-item? meta-item-mime-type meta-item-data meta-item-format
	  make-meta-item meta-item=?
	  meta-data? make-meta-data meta-data-extend meta-data=?)
  (import (rnrs arithmetic bitwise)
	  (rnrs base)
	  (rnrs control)
	  (rnrs records syntactic)
	  (rnrs bytevectors)
	  (rnrs lists)
	  (only (gnu extractor metaformats)
		METAFORMAT_UTF8
		METAFORMAT_C_STRING
		METAFORMAT_BINARY
		meta-format?
		integer->meta-format)
	  (only (gnu extractor metatypes)
		integer->meta-type
		meta-type?)
	  (gnu gnunet utils netstruct)
	  (gnu gnunet utils bv-slice)
	  (gnu gnunet metadata struct)
	  (only (gnu gnunet utils decompress) decompress)
	  (only (gnu gnunet utils hat-let) let^)
	  (only (srfi srfi-31) rec)
	  (only (srfi srfi-43) vector-unfold)
	  (only (srfi srfi-45) delay force))

  ;; An arbitrary implementation limit on buffer sizes.
  (define GNUNET_MAX_MALLOC_CHECKED (* 40 1024 1024))

  (define HEADER_VERSION_MASK #x7FFFFFFF)

  ;; Meta data item
  (define-record-type (<meta-item> %make-meta-item meta-item?)
    ;; Name of extracting plugin (an ASCII string, or #f)
    (fields (immutable plugin-name meta-item-plugin)
	    ;; Mime-type of data (an ASCII string, or #f).
	    (immutable mime-type meta-item-mime-type)
	    ;; The actual meta data (bytevector).
	    (immutable data %meta-item-data)
	    ;; Type of the meta data (<meta-type>).
	    (immutable type meta-item-type)
	    ;; Format of the meta data.
	    (immutable format meta-item-format))
    (opaque #t)
    (sealed #t))

  (define (meta-item-data-size item)
    "How large is the @lisp{meta-item-data} of the <meta-item>
@var{item}? Expressed in bytes."
    (bytevector-length (%meta-item-data item)))

  (define (meta-item=? x y)
    "Are two <meta-item> equal?"
    (assert (meta-item? x))
    (assert (meta-item? y))
    (equal? x y))

  (define (make-meta-item plugin-name mime-type data type format)
    "Construct a meta data item"
    ;; TODO: make strings read-only when running on Guile Scheme.
    ;; (RNRS scheme doesn't have a string-set! procedure,
    ;; so portable sandboxes can still use this module safely)
    (assert (or mime-type (string? mime-type)))

    (%make-meta-item plugin-name mime-type data type format))

  ;; Meta data to associate with a file, directory or namespace.
  (define-record-type (<meta-data> %make-meta-data meta-data?)
    ;; Vector of the meta data items.
    ;; (TODO: perhaps a functional deque would be faster)
    (fields (immutable items meta-data-items)
	    ;; Complete serialized and compressed buffer of the items,
	    ;; as a promised bytevector.
	    (immutable sbuf meta-data-sbuf-promise))
    (opaque #t)
    (sealed #t))

  (define (%vector->meta-data item-vec)
    "Create a fresh <meta-data> with some items (no type-checking)"
    (rec meta-data
	 (%make-meta-data item-vec
			  (delay (make-sbuf meta-data)))))

  (define (make-meta-data)
    "Create a fresh <meta-data>"
    (%vector->meta-data (vector)))

  ;; TODO: perhaps this may be useful?
  #; (define (forget-sbuf meta-data)
    "The serialization buffer is no longer relevant, regenerate it
lazyily.

@var{meta-data}: meta data to forget serialization buffer of"
  frob)

  ;; GNUNET_CONTAINER_meta_data_test_equal isn't ported.
  ;; It doesn't compare the mime types, so it doesn't check
  ;; for equality in the sense of @lisp{equal?}.
  (define (meta-data=? x y)
    "Test if two MDs are equal.  We consider them equal if
the meta types, formats, content and mime type match.
(Warning: the C port doesn't check the mime type)"
    "Compare two meta data items for equality.

Warning: two equal MD are not necessarily @lisp{equal?} (TODO: yet)."
    (assert (meta-data? x))
    (assert (meta-data? y))
    ;; ignore meta-data-sbuf-promise
    (or (eq? x y)
	(and (equal? (meta-data-items x)
		     (meta-data-items y)))))

  (define (meta-data-extend meta plugin-name type format data-mime-type data)
    "Extend metadata.  Note that the list of meta data items is
sorted by size (largest first).

Return the updated meta-data, and #f if this entry already exists, #t
otherwise.  If the entry already exists (identified by @var{type}
and @var{data}), don't change the old entry, except for defining
the mime type if it wasn't set previously, and making the meta
format more specific.

Deviation from upstream: upstream changes directory separators to
POSIX style ('/') for some meta data, this port doesn't.

Entries are identified by @var{type} and @var{data}.

@var{meta} metadata to extend
@var{plugin-name} plugin_name name of the plugin that produced this value;
special values can be used (i.e. '&lt;zlib&gt;' for zlib being
used in the main libextractor library and yielding
meta data) name of extracting plugin
@var{type} libextractor-type describing the meta data
@var{format} basic format information about data
@var{data-mime-type} mime-ype of data (not of original file);
  can be @lisp{#f} (if mime-type is not known) (immutable)
@var{data} actual meta-data found (bytevector)"
    (assert (meta-data? meta))
    (assert (string? plugin-name)) ;; TODO perhaps check for \0 bytes
    (assert (meta-type? type))
    (assert (meta-format? format))
    (assert (or (not data-mime-type) (string? data-mime-type)))
    (assert (bytevector? data))
    ;; Figure out where to insert or set the meta data.
    ;; TODO: binary search instead of linear search
    (let* ((items (meta-data-items meta))
	   (items-length (vector-length items)))
      (let loop ((i 0))
	(cond ((or (>= i items-length)
		   (< (meta-item-data-size (vector-ref items i))))
	       ;; A new entry: insert at the end of the item vector,
	       ;; or earlier. TODO: read-only bytevectors & strings
	       (let* ((meta-item (%make-meta-item plugin-name
						  data-mime-type
						  (bytevector-copy data)
						  type
						  format))
		      (new-items (vector-insert items i meta-item)))
		 (values (%vector->meta-data new-items)
			 #t)))
	      ((and (equal? (meta-item-type (vector-ref items i))
			    type)
		    (bytevector=? (%meta-item-data (vector-ref items i))
				  data))
	       ;; If format and mime-type aren't changed,
	       ;; just keep the old structure (freshness is not required).
	       (let* ((old-item (vector-ref items i))
		      (new-mime-type (or (meta-item-mime-type old-item)
					 data-mime-type))
		      (old-format (meta-item-format old-item))
		      (new-format
		       (if (and (equal? old-format METAFORMAT_C_STRING)
				(equal? format METAFORMAT_UTF8))
			   METAFORMAT_UTF8
			   old-format))
		      (new-item (%make-meta-item new-mime-type
						 (%meta-item-data old-item)
						 meta-item-data
						 meta-item-format)))
		 (if (equal? old-item new-item)
		     (values meta #f)
		     (%vector->meta-data (vector-replace items i
							 new-item)))))
	      (else (loop (+ 1 i)))))))

  (define (vector-insert vec i x)
    "Insert @var{x} into the vector @var{vec} at offset @var{i}"
    (vector-unfold (lambda (j)
		     (cond ((< j i) (vector-ref vec j))
			   ((= j i) x)
			   ((> j i) (vector-ref vec (- j 1)))))
		   (+ 1 (vector-length vec))))

  (define (vector-replace vec i x)
    "Replace the element at offset @var{i} in @var{vec} by @var{x}"
    (vector-unfold (lambda (j)
		     (cond ((= j i) x)
			   (else (vector-ref vec j))))
		   (vector-length vec)))

  (define (bv-slice bv offset length)
    "Copy @var{length} bytes from @var{bv}, starting at @var{offset}."
    (let ((bv-new (make-bytevector length)))
      (bytevector-copy! bv offset bv-new 0 length)
      bv-new))

  (define (meta-data-deserialize slice)
    "Deserialize meta-data, as a <meta-data>.

The serialized meta-data is passed as a readable slice @var{slice}.
In case of success, return an appropriate @code{<meta-data>}.
In case of a parsing error, return @code{#f}.
(Unsupported versions count as parsing errors.)

TODO: perhaps a variant raising conditions may be more informative."
    ;; Argument checks
    (let^ ((!! (slice? slice))
	   (!! (slice-readable? slice))
	   ;; Header checks
	   (? (< (size-length slice) (sizeof MetaDataHeader ())) #f)
	   (! header (slice-slice slice 0 (sizeof MetaDataHeader ())))
	   (! version (bitwise-and (read% MetaDataHeader ("version") header)
				   HEADER_VERSION_MASK))
	   (? (not (= 2 version)) #f) ; unsupported version
	   (! ic (read% MetaDataHeader ("entries") header))
	   (! data-size (read% MetaDataHeader ("size") header))
	   (? (or (> (* ic (sizeof MetaDataEntry ())) data-size)
		  (and (not (= 0 ic))
		       ;; TODO: isn't this clause redundant?
		       (< data-size
			  (* ic (sizeof MetaDataEntry ())))))
	      #f)
	   ;; Decompression
	   (! compressed?
	      (not (= 0 (bitwise-and
			 (read% MetaDataHeader ("version") header)))))
	   (! cdata
	      (let ((maybe-compressed
		     (slice-slice slice (sizeof MetaDataHeader ()))))
		(cond ((not compressed?)
		       maybe-compressed)
		      ((>= data-size GNUNET_MAX_MALLOC_CHECKED)
		       ;; make sure we don't blow our memory limit because
		       ;; of a mal-formed message... 40 MiB seems rather
		       ;; large to encounter in the wild, so this
		       ;; is unlikely to be a problem.
		       #f)
		      (else
		       (decompress maybe-compressed data-size)))))
	   ;; Check decompression was successful
	   (? (not cdata) #f)
	   (! mdata (slice-slice cdata (* ic (sizeof MetaDataEntry ()))))
	   ;; Loop over metadata
	   (/o/ loop-metadata
		(i 0)
		(md (make-meta-data))
		(left (- data-size (* ic (sizeof MetaDataEntry ())))))
	   (? (>= i ic) md) ;; all metadata is deserialised
	   (! from-entry-till-end
	      (slice-slice cdata (* ic (sizeof MetaDataEntry ()))))
	   (! entry-header
	      (slice-slice from-entry-till-end
			   0 (sizeof MetaDataEntry)))
	   (! format (read% MetaDataEntry ("format") entry-header))
	   ;; Bail out if the metaformat is unrecognised
	   ;; FIXME why did I write 0 here?
	   (? (not (member 0 `(,METAFORMAT_UTF8 ,METAFORMAT_C_STRING
						,METAFORMAT_BINARY)))
	      ;; TODO: upstream returns incomplete @var{md}
	      ;; in this case! Return NULL instead!
	      ;; (An incomplete @var{md} is returned in
	      ;; some other cases as well.)
	      #f)
	   (! entry-data-length
	      (read% MetaDataEntry ("data-size") entry-header))
	   (! plugin-name-length
	      (read% MetaDataEntry ("plugin-name-length") entry-header))
	   (! mime-type-length
	      (read% MetaDataEntry ("mime-type-length") entry-header))
	   (? (> entry-data-length left) #f)
	   (! left (- left entry-data-length))
	   (! meta-data-offset
	      (+ mdata-offset left))
	   ;; Strings are terminated with a \0
	   ;; TODO: upstream doesn't check the location of
	   ;; the **first** \0. Is this intentional or irrelevant?
	   (? (and (member format
			   `(,METAFORMAT_UTF8 ,METAFORMAT_C_STRING))
		   (or (= 0 entry-data-length)
		       (not (= (bytevector-u8-ref
				cdata-bv
				(+ meta-data-offset
				   (- entry-data-length 1)))))))
	      #f)
	   (? (> plugin-name-length left) #f)
	   (! left (- left plugin-name-length))
	   (? (and (> plugin-name-length 0)
		   (not (= 0 (bytevector-u8-ref
			      cdata-bv
			      (+ mdata-offset
				 left
				 plugin-name-length
				 -1)))))
	      #f)
	   ;; FIXME plen or entry-data-length
	   ;; Does not include terminating \0.
	   (! plugin-bv
	      (and (> plugin-name-length 0)
		   (bv-slice cdata-bv (+ mdata-offset left)
			     (- plugin-name-length 1))))
	   ;; There isn't any formal requirement for
	   ;; being encoded as UTF-8 as far as I know,
	   ;; but in practice this will probably be ASCII,
	   ;; which is a subset of UTF-8.
	   (! plugin-string
	      (and plugin-bv (utf8->string plugin-bv)))
	   (? (> mime-type-length left) #f)
	   (! left (- left mime-type-length))
	   (? (and (> mime-type-length 0)
		   (< 0 (bytevector-u8-ref cdata-bv
					   (+ mdata-offset
					      mime-type-length
					      -1))))
	      #f)
	   (! mime-type-string
	      (and (< 0 mime-type-length)
		   (utf8->string (bv-slice cdata-bv
					   (+ mdata-offset
					      left -1)
					   (- mime-type-length 1)))))
	   (! new-md
	      (meta-data-extend
	       md plugin-string
	       (read% MetaDataEntry ("type") entry)
	       format
	       mime-type-string
	       (bv-slice cdata-bv meta-data-offset
			 entry-data-length))))
	  (loop-metadata (+ i 1)
			 new-md
			 left)))

  (define (break)
    "This state seems rather suspicious, but not necessarily incorrect."
    #f)

  (define-syntax ==>
    (syntax-rules ()
      ((_ P Q)
       (if P
	   Q
	   #t))))

  (define (meta-data-serialize/uncached meta-data options)
    ;; TODO: serialisation cache
    "Serialize @var{meta-data} into a fresh bytevector

Return the number of bytes written on success,
or @code{#f} on error. (FIXME: raise a condition instead)

@var{meta-data} <meta-data> to serialize
@var{options}: TODO (which compression method to use, is a subset
of the metadata acceptable)"
    (let^ ((! size
	      (vector-fold
	       (lambda (m)
		 (+ (sizeof MetaDataEntry ())
		    (meta-item-data-size m)
		    ;; Is ASCII, therefore
		    ;; string length and
		    ;; byte length
		    ;; coincide.
		    (or (string-length
			 (meta-item-plugin-name m))
			0)
		    (or (string-length
			 (meta-item-mime-type m))
			0)))
	       0
	       (meta-data-items meta-data)))
	   (? (>= size GNUNET_MAX_MALLOC_CHECKED)
	      ;; too large to be processed by upstream
	      #f)
	   (! ent-bv (make-bytevector size))
	   (! mdata-offset
	      (* (sizeof MetaDataEntry ())
		 (meta-data-item-count meta-data)))
	   (_ (let^ ((/o/ meta-item-loop
			  (i 0)
			  (off (- size
				  (* (sizeof MetaDataEntry ())
				     (meta-data-item-count meta-data)))))
		     (? (>= i (meta-data-item-count meta-data))
			(assert (= 0 off))
			'done)
		     (! item (vector-ref (meta-data-items meta-data) i))
		     (! ent-offset (* i (sizeof MetaDataEntry ())))
		     (_ (set%! MetaDataEntry (type) ent-slice (meta-item-type item)))
		     (_ (set%! MetaDataEntry (format) ent-slice (meta-item-format item)))
		     (_ (set%! MetaDataEntry (data-size) ent-slice (meta-item-data-size item)))
		     (! pname (meta-item-plugin-name item))
		     (! mime (meta-item-mime-type item))
		     (! plugin-bv (and pname (string->utf8 pname)))
		     (! mime-bv (and mime (string->utf8 mime)))
		     ;; Add 1 byte for terminating \0.
		     (_ (set%! MetaDataEntry ("plugin-name-length") ent
			       (if plugin-bv
				   (1+ (bytevector-length plugin-bv))
				   0)))
		     (_ (set%! MetaDataEntry ("mime-type-length") ent
			       (if mime-bv
				   (+ 1 (bytevector-length mime-bv))
				   0)))
		     (! off (- off (meta-item-data-size item)))
		     ;; Check for \0 bytes
		     ;; TODO: perform this check elsewhere
		     ;; TODO: check all bytes
		     (? (not (==> (member (meta-item-format item)
					  `(,METAFORMAT_C_STRING
					    ,METAFORMAT_UTF8))
				  (= (bytevector-u8-ref
				      (%meta-item-data item)
				      (bytevector-length
				       (%meta-item-data item)))
				     0)))
			(break))
		     (_ (bytevector-copy!
			 (%meta-item-data item)
			 0
			 ent-bv
			 (+ mdata-offset off)
			 (meta-item-data-size item)))
		     ;; Copy mime type, plugin name
		     ;; and add a terminating \0 byte.
		     (! off (- off (if plugin-bv
				       (+ 1 (bytevector-length plugin-bv))
				       0)))
		     (_ (when plugin-bv
			  (bytevector-copy!
			   plugin-bv 0
			   ent-bv (+ mdata-offset off)
			   (bytevector-length plugin-bv))
			  (bytevector-u8-set!
			   ent-bv
			   (+ mdata-offset off
			      (bytevector-length plugin-bv))
			   0)))
		     (! off (- off
			       (if mime-bv
				   (+ 1 (bytevector-length mime-bv))
				   0)))
		     (_ (when mime-bv
			  (bytevector-copy!
			   mime-bv 0
			   ent-bv (+ mdata-offset off)
			   (bytevector-length mime-bv))
			  (bytevector-u8-set!
			   ent-bv
			   (+ mdata-offset off
			      (bytevector-length mime-bv))
			   0))))
		    (meta-item-loop
		     (+ 1 i)
		     off)))
	   ;; Don't include upstream loop #2, it is a loop
	   ;; for throwing away meta data until
	   ;; everything fits in the buffer ... which doesn't
	   ;; have a use (yet).
	   (! i 0)
	   (? (>= i (meta-data-item-count meta-data))
	      ;; No meta data, only write header
	      (let^ ((! result (make-slice/read-write
				(sizeof MetaDataHeader ())))
		     (_ (set%! MetaDataHeader (version) result 2))
		     (_ (set%! MetaDataHeader (entries) result 0))
		     (_ (set%! MetaDataHeader (size!)   result 0 0)))
		    result-bv))
	   (! left size)
	   (! ent-offset
	      (+ (* i (sizeof MetaDataEntry ()))))
	      ;; TODO in upstream, it is possible to request
	      ;; no compression
	   (! cdata (try-compression ent-bv ent-offset left))
	   (! maybe-compressed-length
	      (if cdata
		  (bytevector-length cdata)
		  left))
	   (! hdr (make-bytevector (+ (sizeof MetaDataHeader ())
				      maybe-compessed-length)))
	   ;; TODO proper #f or condition on overflow
	   (_ (set%! MetaDataHeader (size) hdr left))
	   (_ (set%! MetaDataHeader (entries) hdr
		     (meta-data-item-count meta-data)))
	   (!! (==> cdata (< (bytevector-length cdata) left)))
	   (_ (set%! MetaDataHeader (version hdr)
		     (bitwise-ior 2 (if cdata
					HEADER_COMPRESSED
					0))))
	   (_  (bytevector-copy! (or cdata ent-bv)
				 (if cdata 0 ent-offset)
				 hdr (sizeof MetaDataHeader ())
				 maybe-compressed-length)))
	  hdr)))