aboutsummaryrefslogtreecommitdiff
path: root/src/util/container_meta_data.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/util/container_meta_data.c')
-rw-r--r--src/util/container_meta_data.c721
1 files changed, 721 insertions, 0 deletions
diff --git a/src/util/container_meta_data.c b/src/util/container_meta_data.c
new file mode 100644
index 000000000..b79de57d2
--- /dev/null
+++ b/src/util/container_meta_data.c
@@ -0,0 +1,721 @@
1/*
2 This file is part of GNUnet.
3 (C) 2003, 2004, 2005, 2006, 2008, 2009 Christian Grothoff (and other contributing authors)
4
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
9
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19*/
20
21/**
22 * @file util/container_meta_data.c
23 * @brief Storing of meta data
24 * @author Christian Grothoff
25 */
26
27#include "platform.h"
28#include "gnunet_common.h"
29#include "gnunet_container_lib.h"
30#include "gnunet_strings_lib.h"
31#include "gnunet_time_lib.h"
32#include <extractor.h>
33#include <zlib.h>
34
35#define EXTRA_CHECKS ALLOW_EXTRA_CHECKS
36
37struct Item
38{
39 EXTRACTOR_KeywordType type;
40 char *data;
41};
42
43/**
44 * Meta data to associate with a file, directory or namespace.
45 */
46struct GNUNET_CONTAINER_MetaData
47{
48 uint32_t itemCount;
49 struct Item *items;
50};
51
52/**
53 * Create a fresh struct CONTAINER_MetaData token.
54 */
55struct GNUNET_CONTAINER_MetaData *
56GNUNET_CONTAINER_meta_data_create ()
57{
58 struct GNUNET_CONTAINER_MetaData *ret;
59 ret = GNUNET_malloc (sizeof (struct GNUNET_CONTAINER_MetaData));
60 ret->items = NULL;
61 ret->itemCount = 0;
62 return ret;
63}
64
65/**
66 * Free meta data.
67 */
68void
69GNUNET_CONTAINER_meta_data_destroy (struct GNUNET_CONTAINER_MetaData *md)
70{
71 int i;
72
73 if (md == NULL)
74 return;
75 for (i = 0; i < md->itemCount; i++)
76 GNUNET_free (md->items[i].data);
77 GNUNET_array_grow (md->items, md->itemCount, 0);
78 GNUNET_free (md);
79}
80
81/**
82 * Add the current time as the publication date
83 * to the meta-data.
84 */
85void
86GNUNET_CONTAINER_meta_data_add_publication_date (struct
87 GNUNET_CONTAINER_MetaData
88 *md)
89{
90 char *dat;
91 struct GNUNET_TIME_Absolute t;
92
93 t = GNUNET_TIME_absolute_get ();
94 GNUNET_CONTAINER_meta_data_delete (md, EXTRACTOR_PUBLICATION_DATE, NULL);
95 dat = GNUNET_STRINGS_absolute_time_to_string (t);
96 GNUNET_CONTAINER_meta_data_insert (md, EXTRACTOR_PUBLICATION_DATE, dat);
97 GNUNET_free (dat);
98}
99
100/**
101 * Extend metadata.
102 * @return GNUNET_OK on success, GNUNET_SYSERR if this entry already exists
103 */
104int
105GNUNET_CONTAINER_meta_data_insert (struct GNUNET_CONTAINER_MetaData *md,
106 EXTRACTOR_KeywordType type,
107 const char *data)
108{
109 uint32_t idx;
110 char *p;
111
112 GNUNET_assert (data != NULL);
113 for (idx = 0; idx < md->itemCount; idx++)
114 {
115 if ((md->items[idx].type == type) &&
116 (0 == strcmp (md->items[idx].data, data)))
117 return GNUNET_SYSERR;
118 }
119 idx = md->itemCount;
120 GNUNET_array_grow (md->items, md->itemCount, md->itemCount + 1);
121 md->items[idx].type = type;
122 md->items[idx].data = p = GNUNET_strdup (data);
123
124 /* change OS native dir separators to unix '/' and others to '_' */
125 if (type == EXTRACTOR_FILENAME)
126 {
127 while (*p != '\0')
128 {
129 if (*p == DIR_SEPARATOR)
130 *p = '/';
131 else if (*p == '\\')
132 *p = '_';
133 p++;
134 }
135 }
136
137 return GNUNET_OK;
138}
139
140/**
141 * Remove an item.
142 * @return GNUNET_OK on success, GNUNET_SYSERR if the item does not exist in md
143 */
144int
145GNUNET_CONTAINER_meta_data_delete (struct GNUNET_CONTAINER_MetaData *md,
146 EXTRACTOR_KeywordType type,
147 const char *data)
148{
149 uint32_t idx;
150 int ret = GNUNET_SYSERR;
151 for (idx = 0; idx < md->itemCount; idx++)
152 {
153 if ((md->items[idx].type == type) &&
154 ((data == NULL) || (0 == strcmp (md->items[idx].data, data))))
155 {
156 GNUNET_free (md->items[idx].data);
157 md->items[idx] = md->items[md->itemCount - 1];
158 GNUNET_array_grow (md->items, md->itemCount, md->itemCount - 1);
159 if (data == NULL)
160 {
161 ret = GNUNET_OK;
162 continue;
163 }
164 return GNUNET_OK;
165 }
166 }
167 return ret;
168}
169
170/**
171 * Iterate over MD entries, excluding thumbnails.
172 *
173 * @return number of entries
174 */
175int
176GNUNET_CONTAINER_meta_data_get_contents (const struct
177 GNUNET_CONTAINER_MetaData *md,
178 GNUNET_CONTAINER_MetaDataProcessor
179 iterator, void *closure)
180{
181 uint32_t i;
182 uint32_t sub;
183
184 sub = 0;
185 for (i = 0; i < md->itemCount; i++)
186 {
187 if (!EXTRACTOR_isBinaryType (md->items[i].type))
188 {
189 if ((iterator != NULL) &&
190 (GNUNET_OK != iterator (md->items[i].type,
191 md->items[i].data, closure)))
192 return GNUNET_SYSERR;
193 }
194 else
195 sub++;
196 }
197 return (int) (md->itemCount - sub);
198}
199
200/**
201 * Iterate over MD entries
202 *
203 * @return number of entries
204 */
205char *
206GNUNET_CONTAINER_meta_data_get_by_type (const struct GNUNET_CONTAINER_MetaData
207 *md, EXTRACTOR_KeywordType type)
208{
209 uint32_t i;
210
211 for (i = 0; i < md->itemCount; i++)
212 if (type == md->items[i].type)
213 return GNUNET_strdup (md->items[i].data);
214 return NULL;
215}
216
217/**
218 * Iterate over MD entries
219 *
220 * @return number of entries
221 */
222char *
223GNUNET_CONTAINER_meta_data_get_first_by_types (const struct
224 GNUNET_CONTAINER_MetaData *md,
225 ...)
226{
227 char *ret;
228 va_list args;
229 EXTRACTOR_KeywordType type;
230
231 ret = NULL;
232 va_start (args, md);
233 while (1)
234 {
235 type = va_arg (args, EXTRACTOR_KeywordType);
236 if (type == -1)
237 break;
238 ret = GNUNET_CONTAINER_meta_data_get_by_type (md, type);
239 if (ret != NULL)
240 break;
241 }
242 va_end (args);
243 return ret;
244}
245
246/**
247 * Get a thumbnail from the meta-data (if present).
248 *
249 * @param thumb will be set to the thumbnail data. Must be
250 * freed by the caller!
251 * @return number of bytes in thumbnail, 0 if not available
252 */
253size_t
254GNUNET_CONTAINER_meta_data_get_thumbnail (const struct
255 GNUNET_CONTAINER_MetaData * md,
256 unsigned char **thumb)
257{
258 char *encoded;
259 int ret;
260 size_t size;
261
262 encoded =
263 GNUNET_CONTAINER_meta_data_get_by_type (md, EXTRACTOR_THUMBNAIL_DATA);
264 if (encoded == NULL)
265 return 0;
266 if (strlen (encoded) == 0)
267 {
268 GNUNET_free (encoded);
269 return 0; /* invalid */
270 }
271 *thumb = NULL;
272 ret = EXTRACTOR_binaryDecode (encoded, thumb, &size);
273 GNUNET_free (encoded);
274 if (ret != 0)
275 return 0;
276 return size;
277}
278
279/**
280 * Duplicate struct GNUNET_CONTAINER_MetaData.
281 */
282struct GNUNET_CONTAINER_MetaData *
283GNUNET_CONTAINER_meta_data_duplicate (const struct GNUNET_CONTAINER_MetaData
284 *md)
285{
286 uint32_t i;
287 struct GNUNET_CONTAINER_MetaData *ret;
288
289 if (md == NULL)
290 return NULL;
291 ret = GNUNET_CONTAINER_meta_data_create ();
292 for (i = 0; i < md->itemCount; i++)
293 GNUNET_CONTAINER_meta_data_insert (ret, md->items[i].type,
294 md->items[i].data);
295 return ret;
296}
297
298/**
299 * Extract meta-data from a file.
300 *
301 * @return GNUNET_SYSERR on error, otherwise the number
302 * of meta-data items obtained
303 */
304int
305GNUNET_CONTAINER_meta_data_extract_from_file (struct GNUNET_CONTAINER_MetaData
306 *md, const char *filename,
307 EXTRACTOR_ExtractorList *
308 extractors)
309{
310 EXTRACTOR_KeywordList *head;
311 EXTRACTOR_KeywordList *pos;
312 int ret;
313
314 if (filename == NULL)
315 return GNUNET_SYSERR;
316 if (extractors == NULL)
317 return 0;
318 head = EXTRACTOR_getKeywords (extractors, filename);
319 head = EXTRACTOR_removeDuplicateKeywords (head,
320 EXTRACTOR_DUPLICATES_REMOVE_UNKNOWN);
321 pos = head;
322 ret = 0;
323 while (pos != NULL)
324 {
325 if (GNUNET_OK ==
326 GNUNET_CONTAINER_meta_data_insert (md, pos->keywordType,
327 pos->keyword))
328 ret++;
329 pos = pos->next;
330 }
331 EXTRACTOR_freeKeywords (head);
332 return ret;
333}
334
335static unsigned int
336tryCompression (char *data, unsigned int oldSize)
337{
338 char *tmp;
339 uLongf dlen;
340
341#ifdef compressBound
342 dlen = compressBound (oldSize);
343#else
344 dlen = oldSize + (oldSize / 100) + 20;
345 /* documentation says 100.1% oldSize + 12 bytes, but we
346 should be able to overshoot by more to be safe */
347#endif
348 tmp = GNUNET_malloc (dlen);
349 if (Z_OK == compress2 ((Bytef *) tmp,
350 &dlen, (const Bytef *) data, oldSize, 9))
351 {
352 if (dlen < oldSize)
353 {
354 memcpy (data, tmp, dlen);
355 GNUNET_free (tmp);
356 return dlen;
357 }
358 }
359 GNUNET_free (tmp);
360 return oldSize;
361}
362
363/**
364 * Decompress input, return the decompressed data
365 * as output, set outputSize to the number of bytes
366 * that were found.
367 *
368 * @return NULL on error
369 */
370static char *
371decompress (const char *input,
372 unsigned int inputSize, unsigned int outputSize)
373{
374 char *output;
375 uLongf olen;
376
377 olen = outputSize;
378 output = GNUNET_malloc (olen);
379 if (Z_OK == uncompress ((Bytef *) output,
380 &olen, (const Bytef *) input, inputSize))
381 {
382 return output;
383 }
384 else
385 {
386 GNUNET_free (output);
387 return NULL;
388 }
389}
390
391/**
392 * Flag in 'version' that indicates compressed meta-data.
393 */
394#define HEADER_COMPRESSED 0x80000000
395
396/**
397 * Bits in 'version' that give the version number.
398 */
399#define HEADER_VERSION_MASK 0x7FFFFFFF
400
401struct MetaDataHeader
402{
403 /**
404 * The version of the MD serialization.
405 * The highest bit is used to indicate
406 * compression.
407 *
408 * Version 0 is the current version;
409 * Version is 1 for a NULL pointer.
410 * Other version numbers are not yet defined.
411 */
412 uint32_t version;
413
414 /**
415 * How many MD entries are there?
416 */
417 uint32_t entries;
418
419 /**
420 * Size of the MD (decompressed)
421 */
422 uint32_t size;
423
424 /**
425 * This is followed by 'entries' values of type 'unsigned int' that
426 * correspond to EXTRACTOR_KeywordTypes. After that, the meta-data
427 * keywords follow (0-terminated). The MD block always ends with
428 * 0-termination, padding with 0 until a multiple of 8 bytes.
429 */
430
431};
432
433/**
434 * Serialize meta-data to target.
435 *
436 * @param size maximum number of bytes available
437 * @param part is it ok to just write SOME of the
438 * meta-data to match the size constraint,
439 * possibly discarding some data?
440 * @return number of bytes written on success,
441 * GNUNET_SYSERR on error (typically: not enough
442 * space)
443 */
444int
445GNUNET_CONTAINER_meta_data_serialize (const struct GNUNET_CONTAINER_MetaData
446 *md, char *target, unsigned int max,
447 enum
448 GNUNET_CONTAINER_MetaDataSerializationOptions
449 part)
450{
451 struct MetaDataHeader *hdr;
452 size_t size;
453 size_t pos;
454 uint32_t i;
455 size_t len;
456 uint32_t ic;
457
458 if (max < sizeof (struct MetaDataHeader))
459 return GNUNET_SYSERR; /* far too small */
460 ic = md ? md->itemCount : 0;
461 hdr = NULL;
462 while (1)
463 {
464 size = sizeof (struct MetaDataHeader);
465 size += sizeof (unsigned int) * ic;
466 for (i = 0; i < ic; i++)
467 size += 1 + strlen (md->items[i].data);
468 while (size % 8 != 0)
469 size++;
470 hdr = GNUNET_malloc (size);
471 hdr->version = htonl (md == NULL ? 1 : 0);
472 hdr->entries = htonl (ic);
473 for (i = 0; i < ic; i++)
474 ((unsigned int *) &hdr[1])[i] =
475 htonl ((unsigned int) md->items[i].type);
476 pos = sizeof (struct MetaDataHeader);
477 pos += sizeof (unsigned int) * ic;
478 for (i = 0; i < ic; i++)
479 {
480 len = strlen (md->items[i].data) + 1;
481 memcpy (&((char *) hdr)[pos], md->items[i].data, len);
482 pos += len;
483 }
484
485 hdr->size = htonl (size);
486 if ((part & GNUNET_CONTAINER_META_DATA_SERIALIZE_NO_COMPRESS) == 0)
487 {
488 pos = tryCompression ((char *) &hdr[1],
489 size - sizeof (struct MetaDataHeader));
490 }
491 else
492 {
493 pos = size - sizeof (struct MetaDataHeader);
494 }
495 if (pos < size - sizeof (struct MetaDataHeader))
496 {
497 hdr->version = htonl (HEADER_COMPRESSED);
498 size = pos + sizeof (struct MetaDataHeader);
499 }
500 if (size <= max)
501 break;
502 GNUNET_free (hdr);
503 hdr = NULL;
504
505 if ((part & GNUNET_CONTAINER_META_DATA_SERIALIZE_PART) == 0)
506 {
507 return GNUNET_SYSERR; /* does not fit! */
508 }
509 /* partial serialization ok, try again with less meta-data */
510 if (size > 2 * max)
511 ic = ic * 2 / 3; /* still far too big, make big reductions */
512 else
513 ic--; /* small steps, we're close */
514 }
515 GNUNET_assert (size <= max);
516 memcpy (target, hdr, size);
517 GNUNET_free (hdr);
518 /* extra check: deserialize! */
519#if EXTRA_CHECKS
520 {
521 struct GNUNET_CONTAINER_MetaData *mdx;
522 mdx = GNUNET_CONTAINER_meta_data_deserialize (target, size);
523 GNUNET_assert (NULL != mdx);
524 GNUNET_CONTAINER_meta_data_destroy (mdx);
525 }
526#endif
527 return size;
528}
529
530/**
531 * Estimate (!) the size of the meta-data in
532 * serialized form. The estimate MAY be higher
533 * than what is strictly needed.
534 */
535unsigned int
536GNUNET_CONTAINER_meta_data_get_serialized_size (const struct
537 GNUNET_CONTAINER_MetaData *md,
538 enum
539 GNUNET_CONTAINER_MetaDataSerializationOptions
540 part)
541{
542 struct MetaDataHeader *hdr;
543 size_t size;
544 size_t pos;
545 uint32_t i;
546 size_t len;
547 uint32_t ic;
548
549 ic = md ? md->itemCount : 0;
550 size = sizeof (struct MetaDataHeader);
551 size += sizeof (unsigned int) * ic;
552 for (i = 0; i < ic; i++)
553 size += 1 + strlen (md->items[i].data);
554 while (size % 8 != 0)
555 size++;
556 hdr = GNUNET_malloc (size);
557 hdr->version = htonl (md == NULL ? 1 : 0);
558 hdr->entries = htonl (ic);
559 for (i = 0; i < ic; i++)
560 ((unsigned int *) &hdr[1])[i] = htonl ((unsigned int) md->items[i].type);
561 pos = sizeof (struct MetaDataHeader);
562 pos += sizeof (unsigned int) * ic;
563 for (i = 0; i < ic; i++)
564 {
565 len = strlen (md->items[i].data) + 1;
566 memcpy (&((char *) hdr)[pos], md->items[i].data, len);
567 pos += len;
568 }
569 if ((part & GNUNET_CONTAINER_META_DATA_SERIALIZE_NO_COMPRESS) == 0)
570 {
571 pos =
572 tryCompression ((char *) &hdr[1],
573 size - sizeof (struct MetaDataHeader));
574 }
575 else
576 {
577 pos = size - sizeof (struct MetaDataHeader);
578 }
579 if (pos < size - sizeof (struct MetaDataHeader))
580 size = pos + sizeof (struct MetaDataHeader);
581 GNUNET_free (hdr);
582 return size;
583}
584
585/**
586 * Deserialize meta-data. Initializes md.
587 * @param size number of bytes available
588 * @return MD on success, NULL on error (i.e.
589 * bad format)
590 */
591struct GNUNET_CONTAINER_MetaData *
592GNUNET_CONTAINER_meta_data_deserialize (const char *input, unsigned int size)
593{
594 struct GNUNET_CONTAINER_MetaData *md;
595 const struct MetaDataHeader *hdr;
596 uint32_t ic;
597 char *data;
598 const char *cdata;
599 uint32_t dataSize;
600 int compressed;
601 int i;
602 unsigned int pos;
603 int len;
604 uint32_t version;
605
606 if (size < sizeof (struct MetaDataHeader))
607 return NULL;
608 hdr = (const struct MetaDataHeader *) input;
609 version = ntohl (MAKE_UNALIGNED (hdr->version)) & HEADER_VERSION_MASK;
610 if (version == 1)
611 return NULL; /* null pointer */
612 if (version != 0)
613 {
614 GNUNET_break_op (0); /* unsupported version */
615 return NULL;
616 }
617 ic = ntohl (MAKE_UNALIGNED (hdr->entries));
618 compressed =
619 (ntohl (MAKE_UNALIGNED (hdr->version)) & HEADER_COMPRESSED) != 0;
620 if (compressed)
621 {
622 dataSize =
623 ntohl (MAKE_UNALIGNED (hdr->size)) - sizeof (struct MetaDataHeader);
624 if (dataSize > 2 * 1042 * 1024)
625 {
626 GNUNET_break (0);
627 return NULL; /* only 2 MB allowed [to make sure we don't blow
628 our memory limit because of a mal-formed
629 message... ] */
630 }
631 data =
632 decompress ((const char *) &input[sizeof (struct MetaDataHeader)],
633 size - sizeof (struct MetaDataHeader), dataSize);
634 if (data == NULL)
635 {
636 GNUNET_break_op (0);
637 return NULL;
638 }
639 cdata = data;
640 }
641 else
642 {
643 data = NULL;
644 cdata = (const char *) &hdr[1];
645 dataSize = size - sizeof (struct MetaDataHeader);
646 if (size != ntohl (MAKE_UNALIGNED (hdr->size)))
647 {
648 GNUNET_break (0);
649 return NULL;
650 }
651 }
652
653 if ((sizeof (unsigned int) * ic + ic) > dataSize)
654 {
655 GNUNET_break (0);
656 goto FAILURE;
657 }
658 if ((ic > 0) && (cdata[dataSize - 1] != '\0'))
659 {
660 GNUNET_break (0);
661 goto FAILURE;
662 }
663
664 md = GNUNET_CONTAINER_meta_data_create ();
665 GNUNET_array_grow (md->items, md->itemCount, ic);
666 i = 0;
667 pos = sizeof (unsigned int) * ic;
668 while ((pos < dataSize) && (i < ic))
669 {
670 len = strlen (&cdata[pos]) + 1;
671 md->items[i].type = (EXTRACTOR_KeywordType)
672 ntohl (MAKE_UNALIGNED (((const unsigned int *) cdata)[i]));
673 md->items[i].data = GNUNET_strdup (&cdata[pos]);
674 pos += len;
675 i++;
676 }
677 if (i < ic)
678 { /* oops */
679 GNUNET_CONTAINER_meta_data_destroy (md);
680 goto FAILURE;
681 }
682 GNUNET_free_non_null (data);
683 return md;
684FAILURE:
685 GNUNET_free_non_null (data);
686 return NULL; /* size too small */
687}
688
689/**
690 * Test if two MDs are equal.
691 */
692int
693GNUNET_CONTAINER_meta_data_test_equal (const struct GNUNET_CONTAINER_MetaData
694 *md1,
695 const struct GNUNET_CONTAINER_MetaData
696 *md2)
697{
698 uint32_t i;
699 uint32_t j;
700 int found;
701
702 if (md1->itemCount != md2->itemCount)
703 return GNUNET_NO;
704 for (i = 0; i < md1->itemCount; i++)
705 {
706 found = GNUNET_NO;
707 for (j = 0; j < md2->itemCount; j++)
708 if ((md1->items[i].type == md2->items[j].type) &&
709 (0 == strcmp (md1->items[i].data, md2->items[j].data)))
710 {
711 found = GNUNET_YES;
712 break;
713 }
714 if (found == GNUNET_NO)
715 return GNUNET_NO;
716 }
717 return GNUNET_YES;
718}
719
720
721/* end of container_meta_data.c */