diff options
Diffstat (limited to 'src/util/container_meta_data.c')
-rw-r--r-- | src/util/container_meta_data.c | 1214 |
1 files changed, 835 insertions, 379 deletions
diff --git a/src/util/container_meta_data.c b/src/util/container_meta_data.c index 912ac2684..e4d8737c8 100644 --- a/src/util/container_meta_data.c +++ b/src/util/container_meta_data.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | This file is part of GNUnet. | 2 | This file is part of GNUnet. |
3 | (C) 2003, 2004, 2005, 2006, 2008, 2009 Christian Grothoff (and other contributing authors) | 3 | (C) 2003, 2004, 2005, 2006, 2008, 2009, 2010 Christian Grothoff (and other contributing authors) |
4 | 4 | ||
5 | GNUnet is free software; you can redistribute it and/or modify | 5 | GNUnet is free software; you can redistribute it and/or modify |
6 | it under the terms of the GNU General Public License as published | 6 | it under the terms of the GNU General Public License as published |
@@ -32,12 +32,46 @@ | |||
32 | #include <extractor.h> | 32 | #include <extractor.h> |
33 | #include <zlib.h> | 33 | #include <zlib.h> |
34 | 34 | ||
35 | #define EXTRA_CHECKS ALLOW_EXTRA_CHECKS | 35 | /** |
36 | 36 | * Meta data item. | |
37 | struct Item | 37 | */ |
38 | struct MetaItem | ||
38 | { | 39 | { |
39 | EXTRACTOR_KeywordType type; | 40 | /** |
41 | * This is a linked list. | ||
42 | */ | ||
43 | struct MetaItem *next; | ||
44 | |||
45 | /** | ||
46 | * Name of the extracting plugin. | ||
47 | */ | ||
48 | char *plugin_name; | ||
49 | |||
50 | /** | ||
51 | * Mime-type of data. | ||
52 | */ | ||
53 | char *mime_type; | ||
54 | |||
55 | /** | ||
56 | * The actual meta data. | ||
57 | */ | ||
40 | char *data; | 58 | char *data; |
59 | |||
60 | /** | ||
61 | * Number of bytes in 'data'. | ||
62 | */ | ||
63 | size_t data_size; | ||
64 | |||
65 | /** | ||
66 | * Type of the meta data. | ||
67 | */ | ||
68 | enum EXTRACTOR_MetaType type; | ||
69 | |||
70 | /** | ||
71 | * Format of the meta data. | ||
72 | */ | ||
73 | enum EXTRACTOR_MetaFormat format; | ||
74 | |||
41 | }; | 75 | }; |
42 | 76 | ||
43 | /** | 77 | /** |
@@ -45,86 +79,224 @@ struct Item | |||
45 | */ | 79 | */ |
46 | struct GNUNET_CONTAINER_MetaData | 80 | struct GNUNET_CONTAINER_MetaData |
47 | { | 81 | { |
48 | uint32_t itemCount; | 82 | /** |
49 | struct Item *items; | 83 | * Linked list of the meta data items. |
84 | */ | ||
85 | struct MetaItem *items; | ||
86 | |||
87 | /** | ||
88 | * Complete serialized and compressed buffer of the items. | ||
89 | * NULL if we have not computed that buffer yet. | ||
90 | */ | ||
91 | char *sbuf; | ||
92 | |||
93 | /** | ||
94 | * Number of bytes in 'sbuf'. 0 if the buffer is stale. | ||
95 | */ | ||
96 | size_t sbuf_size; | ||
97 | |||
98 | /** | ||
99 | * Number of items in the linked list. | ||
100 | */ | ||
101 | unsigned int item_count; | ||
102 | |||
50 | }; | 103 | }; |
51 | 104 | ||
105 | |||
52 | /** | 106 | /** |
53 | * Create a fresh struct CONTAINER_MetaData token. | 107 | * Create a fresh struct CONTAINER_MetaData token. |
108 | * | ||
109 | * @return empty meta-data container | ||
54 | */ | 110 | */ |
55 | struct GNUNET_CONTAINER_MetaData * | 111 | struct GNUNET_CONTAINER_MetaData * |
56 | GNUNET_CONTAINER_meta_data_create () | 112 | GNUNET_CONTAINER_meta_data_create () |
57 | { | 113 | { |
58 | struct GNUNET_CONTAINER_MetaData *ret; | 114 | return GNUNET_malloc (sizeof (struct GNUNET_CONTAINER_MetaData)); |
59 | ret = GNUNET_malloc (sizeof (struct GNUNET_CONTAINER_MetaData)); | ||
60 | ret->items = NULL; | ||
61 | ret->itemCount = 0; | ||
62 | return ret; | ||
63 | } | 115 | } |
64 | 116 | ||
117 | |||
118 | /** | ||
119 | * Free meta data item. | ||
120 | * | ||
121 | * @param item item to free | ||
122 | */ | ||
123 | static void | ||
124 | meta_item_free (struct MetaItem *item) | ||
125 | { | ||
126 | GNUNET_free_non_null (item->plugin_name); | ||
127 | GNUNET_free_non_null (item->mime_type); | ||
128 | GNUNET_free_non_null (item->data); | ||
129 | GNUNET_free (item); | ||
130 | } | ||
131 | |||
132 | |||
133 | /** | ||
134 | * The meta data has changed, invalidate its serialization | ||
135 | * buffer. | ||
136 | * | ||
137 | * @param md meta data that changed | ||
138 | */ | ||
139 | static void | ||
140 | invalidate_sbuf (struct GNUNET_CONTAINER_MetaData *md) | ||
141 | { | ||
142 | if (md->sbuf == NULL) | ||
143 | return; | ||
144 | GNUNET_free (md->sbuf); | ||
145 | md->sbuf = NULL; | ||
146 | md->sbuf_size = 0; | ||
147 | } | ||
148 | |||
149 | |||
65 | /** | 150 | /** |
66 | * Free meta data. | 151 | * Free meta data. |
152 | * | ||
153 | * @param md what to free | ||
67 | */ | 154 | */ |
68 | void | 155 | void |
69 | GNUNET_CONTAINER_meta_data_destroy (struct GNUNET_CONTAINER_MetaData *md) | 156 | GNUNET_CONTAINER_meta_data_destroy (struct GNUNET_CONTAINER_MetaData *md) |
70 | { | 157 | { |
71 | int i; | 158 | struct MetaItem *item; |
72 | 159 | ||
73 | if (md == NULL) | 160 | if (md == NULL) |
74 | return; | 161 | return; |
75 | for (i = 0; i < md->itemCount; i++) | 162 | while (NULL != (item = md->items)) |
76 | GNUNET_free (md->items[i].data); | 163 | { |
77 | GNUNET_array_grow (md->items, md->itemCount, 0); | 164 | md->items = item->next; |
165 | meta_item_free (item); | ||
166 | } | ||
167 | GNUNET_free_non_null (md->sbuf); | ||
78 | GNUNET_free (md); | 168 | GNUNET_free (md); |
79 | } | 169 | } |
80 | 170 | ||
171 | |||
81 | /** | 172 | /** |
82 | * Add the current time as the publication date | 173 | * Test if two MDs are equal. We consider them equal if |
83 | * to the meta-data. | 174 | * the meta types, formats and content match (we do not |
175 | * include the mime types and plugins names in this | ||
176 | * consideration). | ||
177 | * | ||
178 | * @param md1 first value to check | ||
179 | * @param md2 other value to check | ||
180 | * @return GNUNET_YES if they are equal | ||
84 | */ | 181 | */ |
85 | void | 182 | int |
86 | GNUNET_CONTAINER_meta_data_add_publication_date (struct | 183 | GNUNET_CONTAINER_meta_data_test_equal (const struct GNUNET_CONTAINER_MetaData |
87 | GNUNET_CONTAINER_MetaData | 184 | *md1, |
88 | *md) | 185 | const struct GNUNET_CONTAINER_MetaData |
186 | *md2) | ||
89 | { | 187 | { |
90 | char *dat; | 188 | struct MetaItem *i; |
91 | struct GNUNET_TIME_Absolute t; | 189 | struct MetaItem *j; |
190 | int found; | ||
92 | 191 | ||
93 | t = GNUNET_TIME_absolute_get (); | 192 | if (md1 == md2) |
94 | GNUNET_CONTAINER_meta_data_delete (md, EXTRACTOR_PUBLICATION_DATE, NULL); | 193 | return GNUNET_YES; |
95 | dat = GNUNET_STRINGS_absolute_time_to_string (t); | 194 | if (md1->item_count != md2->item_count) |
96 | GNUNET_CONTAINER_meta_data_insert (md, EXTRACTOR_PUBLICATION_DATE, dat); | 195 | return GNUNET_NO; |
97 | GNUNET_free (dat); | 196 | |
197 | i = md1->items; | ||
198 | while (NULL != i) | ||
199 | { | ||
200 | found = GNUNET_NO; | ||
201 | j = md2->items; | ||
202 | while (NULL != j) | ||
203 | { | ||
204 | if ( (i->type == j->type) && | ||
205 | (i->format == j->format) && | ||
206 | (i->data_size == j->data_size) && | ||
207 | (0 == memcmp (i->data, | ||
208 | j->data, | ||
209 | i->data_size))) | ||
210 | { | ||
211 | found = GNUNET_YES; | ||
212 | break; | ||
213 | } | ||
214 | j = j->next; | ||
215 | } | ||
216 | if (found == GNUNET_NO) | ||
217 | return GNUNET_NO; | ||
218 | i = i->next; | ||
219 | } | ||
220 | return GNUNET_YES; | ||
98 | } | 221 | } |
99 | 222 | ||
223 | |||
100 | /** | 224 | /** |
101 | * Extend metadata. | 225 | * Extend metadata. Note that the list of meta data items is |
226 | * sorted by size (largest first). | ||
227 | * | ||
228 | * @param md metadata to extend | ||
229 | * @param plugin_name name of the plugin that produced this value; | ||
230 | * special values can be used (i.e. '<zlib>' for zlib being | ||
231 | * used in the main libextractor library and yielding | ||
232 | * meta data). | ||
233 | * @param type libextractor-type describing the meta data | ||
234 | * @param format basic format information about data | ||
235 | * @param data_mime_type mime-type of data (not of the original file); | ||
236 | * can be NULL (if mime-type is not known) | ||
237 | * @param data actual meta-data found | ||
238 | * @param data_len number of bytes in data | ||
102 | * @return GNUNET_OK on success, GNUNET_SYSERR if this entry already exists | 239 | * @return GNUNET_OK on success, GNUNET_SYSERR if this entry already exists |
240 | * data_mime_type and plugin_name are not considered for "exists" checks | ||
103 | */ | 241 | */ |
104 | int | 242 | int |
105 | GNUNET_CONTAINER_meta_data_insert (struct GNUNET_CONTAINER_MetaData *md, | 243 | GNUNET_CONTAINER_meta_data_insert (struct GNUNET_CONTAINER_MetaData *md, |
106 | EXTRACTOR_KeywordType type, | 244 | const char *plugin_name, |
107 | const char *data) | 245 | enum EXTRACTOR_MetaType type, |
246 | enum EXTRACTOR_MetaFormat format, | ||
247 | const char *data_mime_type, | ||
248 | const char *data, | ||
249 | size_t data_len) | ||
108 | { | 250 | { |
109 | uint32_t idx; | 251 | struct MetaItem *prev; |
252 | struct MetaItem *pos; | ||
253 | struct MetaItem *i; | ||
110 | char *p; | 254 | char *p; |
111 | 255 | ||
112 | GNUNET_assert (data != NULL); | 256 | prev = NULL; |
113 | for (idx = 0; idx < md->itemCount; idx++) | 257 | pos = md->items; |
258 | while (NULL != pos) | ||
114 | { | 259 | { |
115 | if ((md->items[idx].type == type) && | 260 | if (pos->data_size < data_len) |
116 | (0 == strcmp (md->items[idx].data, data))) | 261 | break; |
117 | return GNUNET_SYSERR; | 262 | if ( (pos->type == type) && |
263 | (pos->format == format) && | ||
264 | (pos->data_size == data_len) && | ||
265 | (0 == memcmp (pos->data, | ||
266 | data, | ||
267 | data_len))) | ||
268 | { | ||
269 | if ( (pos->mime_type == NULL) && | ||
270 | (data_mime_type != NULL) ) | ||
271 | { | ||
272 | pos->mime_type = GNUNET_strdup (data_mime_type); | ||
273 | invalidate_sbuf (md); | ||
274 | } | ||
275 | return GNUNET_SYSERR; | ||
276 | } | ||
277 | prev = pos; | ||
278 | pos = pos->next; | ||
118 | } | 279 | } |
119 | idx = md->itemCount; | 280 | md->item_count++; |
120 | GNUNET_array_grow (md->items, md->itemCount, md->itemCount + 1); | 281 | i = GNUNET_malloc (sizeof (struct MetaItem)); |
121 | md->items[idx].type = type; | 282 | i->type = type; |
122 | md->items[idx].data = p = GNUNET_strdup (data); | 283 | i->format = format; |
123 | 284 | i->data_size = data_len; | |
285 | i->next = pos; | ||
286 | if (prev == NULL) | ||
287 | md->items = i; | ||
288 | else | ||
289 | prev->next = i; | ||
290 | i->mime_type = (data_mime_type == NULL) ? NULL : GNUNET_strdup (data_mime_type); | ||
291 | i->plugin_name = (plugin_name == NULL) ? NULL : GNUNET_strdup (plugin_name); | ||
292 | i->data = GNUNET_malloc (data_len); | ||
293 | memcpy (i->data, data, data_len); | ||
124 | /* change OS native dir separators to unix '/' and others to '_' */ | 294 | /* change OS native dir separators to unix '/' and others to '_' */ |
125 | if (type == EXTRACTOR_FILENAME) | 295 | if (type == EXTRACTOR_METATYPE_FILENAME) |
126 | { | 296 | { |
127 | while (*p != '\0') | 297 | p = i->data; |
298 | while ( (*p != '\0') && | ||
299 | (p < i->data + data_len) ) | ||
128 | { | 300 | { |
129 | if (*p == DIR_SEPARATOR) | 301 | if (*p == DIR_SEPARATOR) |
130 | *p = '/'; | 302 | *p = '/'; |
@@ -133,10 +305,11 @@ GNUNET_CONTAINER_meta_data_insert (struct GNUNET_CONTAINER_MetaData *md, | |||
133 | p++; | 305 | p++; |
134 | } | 306 | } |
135 | } | 307 | } |
136 | 308 | invalidate_sbuf (md); | |
137 | return GNUNET_OK; | 309 | return GNUNET_OK; |
138 | } | 310 | } |
139 | 311 | ||
312 | |||
140 | /** | 313 | /** |
141 | * Remove an item. | 314 | * Remove an item. |
142 | * | 315 | * |
@@ -144,36 +317,78 @@ GNUNET_CONTAINER_meta_data_insert (struct GNUNET_CONTAINER_MetaData *md, | |||
144 | * @param type type of the item to remove | 317 | * @param type type of the item to remove |
145 | * @param data specific value to remove, NULL to remove all | 318 | * @param data specific value to remove, NULL to remove all |
146 | * entries of the given type | 319 | * entries of the given type |
320 | * @param data_len number of bytes in data | ||
147 | * @return GNUNET_OK on success, GNUNET_SYSERR if the item does not exist in md | 321 | * @return GNUNET_OK on success, GNUNET_SYSERR if the item does not exist in md |
148 | */ | 322 | */ |
149 | int | 323 | int |
150 | GNUNET_CONTAINER_meta_data_delete (struct GNUNET_CONTAINER_MetaData *md, | 324 | GNUNET_CONTAINER_meta_data_delete (struct GNUNET_CONTAINER_MetaData *md, |
151 | EXTRACTOR_KeywordType type, | 325 | enum EXTRACTOR_MetaType type, |
152 | const char *data) | 326 | const char *data, |
327 | size_t data_len) | ||
153 | { | 328 | { |
154 | uint32_t idx; | 329 | struct MetaItem *pos; |
155 | int ret = GNUNET_SYSERR; | 330 | struct MetaItem *prev; |
156 | for (idx = 0; idx < md->itemCount; idx++) | 331 | |
332 | prev = NULL; | ||
333 | pos = md->items; | ||
334 | while (NULL != pos) | ||
157 | { | 335 | { |
158 | if ((md->items[idx].type == type) && | 336 | if ( (pos->type == type) && |
159 | ((data == NULL) || (0 == strcmp (md->items[idx].data, data)))) | 337 | ( (data == NULL) || |
160 | { | 338 | ( (pos->data_size == data_len) && |
161 | GNUNET_free (md->items[idx].data); | 339 | (0 == memcmp (pos->data, |
162 | md->items[idx] = md->items[md->itemCount - 1]; | 340 | data, |
163 | GNUNET_array_grow (md->items, md->itemCount, md->itemCount - 1); | 341 | data_len))) ) ) |
164 | if (data == NULL) | 342 | { |
165 | { | 343 | if (prev == NULL) |
166 | ret = GNUNET_OK; | 344 | md->items = pos->next; |
167 | continue; | 345 | else |
168 | } | 346 | prev->next = pos->next; |
169 | return GNUNET_OK; | 347 | meta_item_free (pos); |
170 | } | 348 | md->item_count--; |
349 | invalidate_sbuf (md); | ||
350 | return GNUNET_OK; | ||
351 | } | ||
352 | prev = pos; | ||
353 | pos = pos->next; | ||
171 | } | 354 | } |
172 | return ret; | 355 | return GNUNET_SYSERR; |
173 | } | 356 | } |
174 | 357 | ||
358 | |||
175 | /** | 359 | /** |
176 | * Iterate over MD entries, excluding thumbnails. | 360 | * Add the current time as the publication date |
361 | * to the meta-data. | ||
362 | * | ||
363 | * @param md metadata to modify | ||
364 | */ | ||
365 | void | ||
366 | GNUNET_CONTAINER_meta_data_add_publication_date (struct | ||
367 | GNUNET_CONTAINER_MetaData | ||
368 | *md) | ||
369 | { | ||
370 | char *dat; | ||
371 | struct GNUNET_TIME_Absolute t; | ||
372 | |||
373 | t = GNUNET_TIME_absolute_get (); | ||
374 | GNUNET_CONTAINER_meta_data_delete (md, | ||
375 | EXTRACTOR_METATYPE_PUBLICATION_DATE, | ||
376 | NULL, | ||
377 | 0); | ||
378 | dat = GNUNET_STRINGS_absolute_time_to_string (t); | ||
379 | GNUNET_CONTAINER_meta_data_insert (md, | ||
380 | "<gnunet>", | ||
381 | EXTRACTOR_METATYPE_PUBLICATION_DATE, | ||
382 | EXTRACTOR_METAFORMAT_UTF8, | ||
383 | "text/plain", | ||
384 | dat, | ||
385 | strlen(dat)+1); | ||
386 | GNUNET_free (dat); | ||
387 | } | ||
388 | |||
389 | |||
390 | /** | ||
391 | * Iterate over MD entries. | ||
177 | * | 392 | * |
178 | * @param md metadata to inspect | 393 | * @param md metadata to inspect |
179 | * @param iter function to call on each entry | 394 | * @param iter function to call on each entry |
@@ -181,51 +396,71 @@ GNUNET_CONTAINER_meta_data_delete (struct GNUNET_CONTAINER_MetaData *md, | |||
181 | * @return number of entries | 396 | * @return number of entries |
182 | */ | 397 | */ |
183 | int | 398 | int |
184 | GNUNET_CONTAINER_meta_data_get_contents (const struct | 399 | GNUNET_CONTAINER_meta_data_iterate (const struct |
185 | GNUNET_CONTAINER_MetaData *md, | 400 | GNUNET_CONTAINER_MetaData *md, |
186 | GNUNET_CONTAINER_MetaDataProcessor | 401 | EXTRACTOR_MetaDataProcessor |
187 | iter, void *iter_cls) | 402 | iter, void *iter_cls) |
188 | { | 403 | { |
189 | uint32_t i; | 404 | struct MetaItem *pos; |
190 | uint32_t sub; | ||
191 | 405 | ||
192 | sub = 0; | 406 | if (iter == NULL) |
193 | for (i = 0; i < md->itemCount; i++) | 407 | return md->item_count; |
408 | pos = md->items; | ||
409 | while (NULL != pos) | ||
194 | { | 410 | { |
195 | if (!EXTRACTOR_isBinaryType (md->items[i].type)) | 411 | if (0 != iter (iter_cls, |
196 | { | 412 | pos->plugin_name, |
197 | if ((iter != NULL) && | 413 | pos->type, |
198 | (GNUNET_OK != iter (iter_cls, | 414 | pos->format, |
199 | md->items[i].type, md->items[i].data))) | 415 | pos->mime_type, |
200 | return GNUNET_SYSERR; | 416 | pos->data, |
201 | } | 417 | pos->data_size)) |
202 | else | 418 | return md->item_count; |
203 | sub++; | 419 | pos = pos->next; |
204 | } | 420 | } |
205 | return (int) (md->itemCount - sub); | 421 | return md->item_count; |
206 | } | 422 | } |
207 | 423 | ||
424 | |||
208 | /** | 425 | /** |
209 | * Iterate over MD entries | 426 | * Get the first MD entry of the given type. Caller |
427 | * is responsible for freeing the return value. | ||
428 | * Also, only meta data items that are strings (0-terminated) | ||
429 | * are returned by this function. | ||
210 | * | 430 | * |
211 | * @return number of entries | 431 | * @param md metadata to inspect |
432 | * @param type type to look for | ||
433 | * @return NULL if no entry was found | ||
212 | */ | 434 | */ |
213 | char * | 435 | char * |
214 | GNUNET_CONTAINER_meta_data_get_by_type (const struct GNUNET_CONTAINER_MetaData | 436 | GNUNET_CONTAINER_meta_data_get_by_type (const struct GNUNET_CONTAINER_MetaData |
215 | *md, EXTRACTOR_KeywordType type) | 437 | *md, enum EXTRACTOR_MetaType type) |
216 | { | 438 | { |
217 | uint32_t i; | 439 | struct MetaItem *pos; |
218 | 440 | ||
219 | for (i = 0; i < md->itemCount; i++) | 441 | pos = md->items; |
220 | if (type == md->items[i].type) | 442 | while (NULL != pos) |
221 | return GNUNET_strdup (md->items[i].data); | 443 | { |
444 | if ( (type == pos->type) && | ||
445 | ( (pos->format == EXTRACTOR_METAFORMAT_UTF8) || | ||
446 | (pos->format == EXTRACTOR_METAFORMAT_C_STRING) ) ) | ||
447 | return GNUNET_strdup (pos->data); | ||
448 | pos = pos->next; | ||
449 | } | ||
222 | return NULL; | 450 | return NULL; |
223 | } | 451 | } |
224 | 452 | ||
453 | |||
225 | /** | 454 | /** |
226 | * Iterate over MD entries | 455 | * Get the first matching MD entry of the given types. Caller is |
456 | * responsible for freeing the return value. Also, only meta data | ||
457 | * items that are strings (0-terminated) are returned by this | ||
458 | * function. | ||
227 | * | 459 | * |
228 | * @return number of entries | 460 | * @param md metadata to inspect |
461 | * @param ... -1-terminated list of types | ||
462 | * @return NULL if we do not have any such entry, | ||
463 | * otherwise client is responsible for freeing the value! | ||
229 | */ | 464 | */ |
230 | char * | 465 | char * |
231 | GNUNET_CONTAINER_meta_data_get_first_by_types (const struct | 466 | GNUNET_CONTAINER_meta_data_get_first_by_types (const struct |
@@ -234,13 +469,13 @@ GNUNET_CONTAINER_meta_data_get_first_by_types (const struct | |||
234 | { | 469 | { |
235 | char *ret; | 470 | char *ret; |
236 | va_list args; | 471 | va_list args; |
237 | EXTRACTOR_KeywordType type; | 472 | enum EXTRACTOR_MetaType type; |
238 | 473 | ||
239 | ret = NULL; | 474 | ret = NULL; |
240 | va_start (args, md); | 475 | va_start (args, md); |
241 | while (1) | 476 | while (1) |
242 | { | 477 | { |
243 | type = va_arg (args, EXTRACTOR_KeywordType); | 478 | type = va_arg (args, enum EXTRACTOR_MetaType); |
244 | if (type == -1) | 479 | if (type == -1) |
245 | break; | 480 | break; |
246 | ret = GNUNET_CONTAINER_meta_data_get_by_type (md, type); | 481 | ret = GNUNET_CONTAINER_meta_data_get_by_type (md, type); |
@@ -251,6 +486,7 @@ GNUNET_CONTAINER_meta_data_get_first_by_types (const struct | |||
251 | return ret; | 486 | return ret; |
252 | } | 487 | } |
253 | 488 | ||
489 | |||
254 | /** | 490 | /** |
255 | * Get a thumbnail from the meta-data (if present). | 491 | * Get a thumbnail from the meta-data (if present). |
256 | * | 492 | * |
@@ -264,27 +500,33 @@ GNUNET_CONTAINER_meta_data_get_thumbnail (const struct | |||
264 | GNUNET_CONTAINER_MetaData * md, | 500 | GNUNET_CONTAINER_MetaData * md, |
265 | unsigned char **thumb) | 501 | unsigned char **thumb) |
266 | { | 502 | { |
267 | char *encoded; | 503 | struct MetaItem *pos; |
268 | int ret; | 504 | struct MetaItem *match; |
269 | size_t size; | ||
270 | 505 | ||
271 | encoded = | 506 | match = NULL; |
272 | GNUNET_CONTAINER_meta_data_get_by_type (md, EXTRACTOR_THUMBNAIL_DATA); | 507 | pos = md->items; |
273 | if (encoded == NULL) | 508 | while (NULL != pos) |
274 | return 0; | ||
275 | if (strlen (encoded) == 0) | ||
276 | { | 509 | { |
277 | GNUNET_free (encoded); | 510 | if ( (0 == strncasecmp ("image/", pos->mime_type, |
278 | return 0; /* invalid */ | 511 | strlen("image/"))) && |
512 | (pos->format == EXTRACTOR_METAFORMAT_BINARY) ) | ||
513 | { | ||
514 | if (match == NULL) | ||
515 | match = pos; | ||
516 | else if ( (match->type != EXTRACTOR_METATYPE_THUMBNAIL) && | ||
517 | (pos->type == EXTRACTOR_METATYPE_THUMBNAIL) ) | ||
518 | match = pos; | ||
519 | } | ||
520 | pos = pos->next; | ||
279 | } | 521 | } |
280 | *thumb = NULL; | 522 | if (match == NULL) |
281 | ret = EXTRACTOR_binaryDecode (encoded, thumb, &size); | ||
282 | GNUNET_free (encoded); | ||
283 | if (ret != 0) | ||
284 | return 0; | 523 | return 0; |
285 | return size; | 524 | *thumb = GNUNET_malloc (match->data_size); |
525 | memcpy (*thumb, match->data, match->data_size); | ||
526 | return match->data_size; | ||
286 | } | 527 | } |
287 | 528 | ||
529 | |||
288 | /** | 530 | /** |
289 | * Duplicate struct GNUNET_CONTAINER_MetaData. | 531 | * Duplicate struct GNUNET_CONTAINER_MetaData. |
290 | * | 532 | * |
@@ -295,18 +537,66 @@ struct GNUNET_CONTAINER_MetaData * | |||
295 | GNUNET_CONTAINER_meta_data_duplicate (const struct GNUNET_CONTAINER_MetaData | 537 | GNUNET_CONTAINER_meta_data_duplicate (const struct GNUNET_CONTAINER_MetaData |
296 | *md) | 538 | *md) |
297 | { | 539 | { |
298 | uint32_t i; | ||
299 | struct GNUNET_CONTAINER_MetaData *ret; | 540 | struct GNUNET_CONTAINER_MetaData *ret; |
541 | struct MetaItem *pos; | ||
300 | 542 | ||
301 | if (md == NULL) | 543 | if (md == NULL) |
302 | return NULL; | 544 | return NULL; |
303 | ret = GNUNET_CONTAINER_meta_data_create (); | 545 | ret = GNUNET_CONTAINER_meta_data_create (); |
304 | for (i = 0; i < md->itemCount; i++) | 546 | pos = md->items; |
305 | GNUNET_CONTAINER_meta_data_insert (ret, md->items[i].type, | 547 | while (NULL != pos) |
306 | md->items[i].data); | 548 | { |
549 | GNUNET_CONTAINER_meta_data_insert (ret, | ||
550 | pos->plugin_name, | ||
551 | pos->type, | ||
552 | pos->format, | ||
553 | pos->mime_type, | ||
554 | pos->data, | ||
555 | pos->data_size); | ||
556 | pos = pos->next; | ||
557 | } | ||
307 | return ret; | 558 | return ret; |
308 | } | 559 | } |
309 | 560 | ||
561 | |||
562 | /** | ||
563 | * Add meta data that libextractor finds to our meta data | ||
564 | * container. | ||
565 | * | ||
566 | * @param cls closure, our meta data container | ||
567 | * @param plugin_name name of the plugin that produced this value; | ||
568 | * special values can be used (i.e. '<zlib>' for zlib being | ||
569 | * used in the main libextractor library and yielding | ||
570 | * meta data). | ||
571 | * @param type libextractor-type describing the meta data | ||
572 | * @param format basic format information about data | ||
573 | * @param data_mime_type mime-type of data (not of the original file); | ||
574 | * can be NULL (if mime-type is not known) | ||
575 | * @param data actual meta-data found | ||
576 | * @param data_len number of bytes in data | ||
577 | * @return always 0 to continue extracting | ||
578 | */ | ||
579 | static int | ||
580 | add_to_md(void *cls, | ||
581 | const char *plugin_name, | ||
582 | enum EXTRACTOR_MetaType type, | ||
583 | enum EXTRACTOR_MetaFormat format, | ||
584 | const char *data_mime_type, | ||
585 | const char *data, | ||
586 | size_t data_len) | ||
587 | { | ||
588 | struct GNUNET_CONTAINER_MetaData *md = cls; | ||
589 | (void) GNUNET_CONTAINER_meta_data_insert (md, | ||
590 | plugin_name, | ||
591 | type, | ||
592 | format, | ||
593 | data_mime_type, | ||
594 | data, | ||
595 | data_len); | ||
596 | return 0; | ||
597 | } | ||
598 | |||
599 | |||
310 | /** | 600 | /** |
311 | * Extract meta-data from a file. | 601 | * Extract meta-data from a file. |
312 | * | 602 | * |
@@ -316,37 +606,43 @@ GNUNET_CONTAINER_meta_data_duplicate (const struct GNUNET_CONTAINER_MetaData | |||
316 | int | 606 | int |
317 | GNUNET_CONTAINER_meta_data_extract_from_file (struct GNUNET_CONTAINER_MetaData | 607 | GNUNET_CONTAINER_meta_data_extract_from_file (struct GNUNET_CONTAINER_MetaData |
318 | *md, const char *filename, | 608 | *md, const char *filename, |
319 | EXTRACTOR_ExtractorList * | 609 | struct EXTRACTOR_PluginList * |
320 | extractors) | 610 | extractors) |
321 | { | 611 | { |
322 | EXTRACTOR_KeywordList *head; | 612 | unsigned int old; |
323 | EXTRACTOR_KeywordList *pos; | ||
324 | int ret; | ||
325 | 613 | ||
326 | if (filename == NULL) | 614 | if (filename == NULL) |
327 | return GNUNET_SYSERR; | 615 | return GNUNET_SYSERR; |
328 | if (extractors == NULL) | 616 | if (extractors == NULL) |
329 | return 0; | 617 | return 0; |
330 | head = EXTRACTOR_getKeywords (extractors, filename); | 618 | old = md->item_count; |
331 | head = EXTRACTOR_removeDuplicateKeywords (head, | 619 | EXTRACTOR_extract (extractors, |
332 | EXTRACTOR_DUPLICATES_REMOVE_UNKNOWN); | 620 | filename, |
333 | pos = head; | 621 | NULL, 0, |
334 | ret = 0; | 622 | &add_to_md, |
335 | while (pos != NULL) | 623 | md); |
336 | { | 624 | return (int) (md->item_count - old); |
337 | if (GNUNET_OK == | ||
338 | GNUNET_CONTAINER_meta_data_insert (md, pos->keywordType, | ||
339 | pos->keyword)) | ||
340 | ret++; | ||
341 | pos = pos->next; | ||
342 | } | ||
343 | EXTRACTOR_freeKeywords (head); | ||
344 | return ret; | ||
345 | } | 625 | } |
346 | 626 | ||
347 | 627 | ||
348 | static unsigned int | 628 | /** |
349 | tryCompression (char *data, unsigned int oldSize) | 629 | * Try to compress the given block of data. |
630 | * | ||
631 | * @param data block to compress; if compression | ||
632 | * resulted in a smaller block, the first | ||
633 | * bytes of data are updated to the compressed | ||
634 | * data | ||
635 | * @param oldSize number of bytes in data | ||
636 | * @param result set to the compressed data | ||
637 | * @param newSize set to size of result | ||
638 | * @return GNUNET_YES if compression reduce the size, | ||
639 | * GNUNET_NO if compression did not help | ||
640 | */ | ||
641 | static int | ||
642 | try_compression (const char *data, | ||
643 | size_t oldSize, | ||
644 | char **result, | ||
645 | size_t *newSize) | ||
350 | { | 646 | { |
351 | char *tmp; | 647 | char *tmp; |
352 | uLongf dlen; | 648 | uLongf dlen; |
@@ -364,62 +660,40 @@ tryCompression (char *data, unsigned int oldSize) | |||
364 | { | 660 | { |
365 | if (dlen < oldSize) | 661 | if (dlen < oldSize) |
366 | { | 662 | { |
367 | memcpy (data, tmp, dlen); | 663 | *result = tmp; |
368 | GNUNET_free (tmp); | 664 | *newSize = dlen; |
369 | return dlen; | 665 | return GNUNET_YES; |
370 | } | 666 | } |
371 | } | 667 | } |
372 | GNUNET_free (tmp); | 668 | GNUNET_free (tmp); |
373 | return oldSize; | 669 | return GNUNET_NO; |
374 | } | 670 | } |
375 | 671 | ||
376 | /** | ||
377 | * Decompress input, return the decompressed data | ||
378 | * as output, set outputSize to the number of bytes | ||
379 | * that were found. | ||
380 | * | ||
381 | * @return NULL on error | ||
382 | */ | ||
383 | static char * | ||
384 | decompress (const char *input, | ||
385 | unsigned int inputSize, unsigned int outputSize) | ||
386 | { | ||
387 | char *output; | ||
388 | uLongf olen; | ||
389 | |||
390 | olen = outputSize; | ||
391 | output = GNUNET_malloc (olen); | ||
392 | if (Z_OK == uncompress ((Bytef *) output, | ||
393 | &olen, (const Bytef *) input, inputSize)) | ||
394 | { | ||
395 | return output; | ||
396 | } | ||
397 | else | ||
398 | { | ||
399 | GNUNET_free (output); | ||
400 | return NULL; | ||
401 | } | ||
402 | } | ||
403 | 672 | ||
404 | /** | 673 | /** |
405 | * Flag in 'version' that indicates compressed meta-data. | 674 | * Flag in 'version' that indicates compressed meta-data. |
406 | */ | 675 | */ |
407 | #define HEADER_COMPRESSED 0x80000000 | 676 | #define HEADER_COMPRESSED 0x80000000 |
408 | 677 | ||
678 | |||
409 | /** | 679 | /** |
410 | * Bits in 'version' that give the version number. | 680 | * Bits in 'version' that give the version number. |
411 | */ | 681 | */ |
412 | #define HEADER_VERSION_MASK 0x7FFFFFFF | 682 | #define HEADER_VERSION_MASK 0x7FFFFFFF |
413 | 683 | ||
684 | |||
685 | /** | ||
686 | * Header for serialized meta data. | ||
687 | */ | ||
414 | struct MetaDataHeader | 688 | struct MetaDataHeader |
415 | { | 689 | { |
416 | /** | 690 | /** |
417 | * The version of the MD serialization. | 691 | * The version of the MD serialization. The highest bit is used to |
418 | * The highest bit is used to indicate | 692 | * indicate compression. |
419 | * compression. | ||
420 | * | 693 | * |
421 | * Version 0 is the current version; | 694 | * Version 0 is traditional (pre-0.9) meta data (unsupported) |
422 | * Version is 1 for a NULL pointer. | 695 | * Version is 1 for a NULL pointer |
696 | * Version 2 is for 0.9.x (and possibly higher) | ||
423 | * Other version numbers are not yet defined. | 697 | * Other version numbers are not yet defined. |
424 | */ | 698 | */ |
425 | uint32_t version; | 699 | uint32_t version; |
@@ -430,24 +704,57 @@ struct MetaDataHeader | |||
430 | uint32_t entries; | 704 | uint32_t entries; |
431 | 705 | ||
432 | /** | 706 | /** |
433 | * Size of the MD (decompressed) | 707 | * Size of the decompressed meta data. |
434 | */ | 708 | */ |
435 | uint32_t size; | 709 | uint32_t size; |
436 | 710 | ||
437 | /** | 711 | /** |
438 | * This is followed by 'entries' values of type 'uint32_t' that | 712 | * This is followed by 'entries' values of type 'struct MetaDataEntry' |
439 | * correspond to EXTRACTOR_KeywordTypes. After that, the meta-data | 713 | * and then by 'entry' plugin names, mime-types and data blocks |
440 | * keywords follow (0-terminated). The MD block always ends with | 714 | * as specified in those meta data entries. |
441 | * 0-termination, padding with 0 until a multiple of 8 bytes. | 715 | */ |
716 | }; | ||
717 | |||
718 | |||
719 | /** | ||
720 | * Entry of serialized meta data. | ||
721 | */ | ||
722 | struct MetaDataEntry | ||
723 | { | ||
724 | /** | ||
725 | * Meta data type. Corresponds to an 'enum EXTRACTOR_MetaType' | ||
726 | */ | ||
727 | uint32_t type; | ||
728 | |||
729 | /** | ||
730 | * Meta data format. Corresponds to an 'enum EXTRACTOR_MetaFormat' | ||
442 | */ | 731 | */ |
732 | uint32_t format; | ||
733 | |||
734 | /** | ||
735 | * Number of bytes of meta data. | ||
736 | */ | ||
737 | uint32_t data_size; | ||
738 | |||
739 | /** | ||
740 | * Number of bytes in the plugin name including 0-terminator. 0 for NULL. | ||
741 | */ | ||
742 | uint32_t plugin_name_len; | ||
743 | |||
744 | /** | ||
745 | * Number of bytes in the mime type including 0-terminator. 0 for NULL. | ||
746 | */ | ||
747 | uint32_t mime_type_len; | ||
443 | 748 | ||
444 | }; | 749 | }; |
445 | 750 | ||
751 | |||
446 | /** | 752 | /** |
447 | * Serialize meta-data to target. | 753 | * Serialize meta-data to target. |
448 | * | 754 | * |
449 | * @param md metadata to serialize | 755 | * @param md metadata to serialize |
450 | * @param target where to write the serialized metadata | 756 | * @param target where to write the serialized metadata; |
757 | * *target can be NULL, in which case memory is allocated | ||
451 | * @param max maximum number of bytes available in target | 758 | * @param max maximum number of bytes available in target |
452 | * @param opt is it ok to just write SOME of the | 759 | * @param opt is it ok to just write SOME of the |
453 | * meta-data to match the size constraint, | 760 | * meta-data to match the size constraint, |
@@ -458,149 +765,273 @@ struct MetaDataHeader | |||
458 | */ | 765 | */ |
459 | ssize_t | 766 | ssize_t |
460 | GNUNET_CONTAINER_meta_data_serialize (const struct GNUNET_CONTAINER_MetaData | 767 | GNUNET_CONTAINER_meta_data_serialize (const struct GNUNET_CONTAINER_MetaData |
461 | *md, char *target, size_t max, | 768 | *md, char **target, size_t max, |
462 | enum | 769 | enum |
463 | GNUNET_CONTAINER_MetaDataSerializationOptions | 770 | GNUNET_CONTAINER_MetaDataSerializationOptions |
464 | opt) | 771 | opt) |
465 | { | 772 | { |
466 | struct MetaDataHeader *hdr; | 773 | struct GNUNET_CONTAINER_MetaData *vmd; |
774 | struct MetaItem *pos; | ||
775 | struct MetaDataHeader *hdr; | ||
776 | struct MetaDataEntry *ent; | ||
777 | unsigned int i; | ||
778 | uint64_t msize; | ||
779 | size_t off; | ||
780 | char *mdata; | ||
781 | char *cdata; | ||
782 | size_t mlen; | ||
783 | size_t plen; | ||
467 | size_t size; | 784 | size_t size; |
468 | size_t pos; | 785 | size_t left; |
469 | uint32_t i; | 786 | size_t clen; |
470 | size_t len; | 787 | int comp; |
471 | uint32_t ic; | ||
472 | 788 | ||
473 | if (max < sizeof (struct MetaDataHeader)) | 789 | if (max < sizeof (struct MetaDataHeader)) |
474 | return GNUNET_SYSERR; /* far too small */ | 790 | return GNUNET_SYSERR; /* far too small */ |
475 | ic = md ? md->itemCount : 0; | 791 | if (md == NULL) |
476 | hdr = NULL; | 792 | return 0; |
477 | while (1) | 793 | |
794 | if (md->sbuf != NULL) | ||
478 | { | 795 | { |
479 | size = sizeof (struct MetaDataHeader); | 796 | /* try to use serialization cache */ |
480 | size += sizeof (uint32_t) * ic; | 797 | if (md->sbuf_size < max) |
481 | for (i = 0; i < ic; i++) | 798 | { |
482 | size += 1 + strlen (md->items[i].data); | 799 | if (NULL == *target) |
483 | while (size % 8 != 0) | 800 | *target = GNUNET_malloc (md->sbuf_size); |
484 | size++; | 801 | memcpy (*target, |
485 | hdr = GNUNET_malloc (size); | 802 | md->sbuf, |
486 | hdr->version = htonl (md == NULL ? 1 : 0); | 803 | md->sbuf_size); |
487 | hdr->entries = htonl (ic); | 804 | return md->sbuf_size; |
488 | for (i = 0; i < ic; i++) | 805 | } |
489 | ((uint32_t *) & hdr[1])[i] = htonl ((uint32_t) md->items[i].type); | 806 | if (0 == (opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_PART)) |
490 | pos = sizeof (struct MetaDataHeader); | 807 | return GNUNET_SYSERR; /* can say that this will fail */ |
491 | pos += sizeof (uint32_t) * ic; | 808 | /* need to compute a partial serialization, sbuf useless ... */ |
492 | for (i = 0; i < ic; i++) | 809 | } |
493 | { | ||
494 | len = strlen (md->items[i].data) + 1; | ||
495 | memcpy (&((char *) hdr)[pos], md->items[i].data, len); | ||
496 | pos += len; | ||
497 | } | ||
498 | 810 | ||
499 | hdr->size = htonl (size); | 811 | |
500 | if ((opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_NO_COMPRESS) == 0) | 812 | msize = 0; |
501 | { | 813 | pos = md->items; |
502 | pos = tryCompression ((char *) &hdr[1], | 814 | while (NULL != pos) |
503 | size - sizeof (struct MetaDataHeader)); | 815 | { |
504 | } | 816 | msize += sizeof (struct MetaDataEntry); |
817 | msize += pos->data_size; | ||
818 | if (pos->plugin_name != NULL) | ||
819 | msize += strlen (pos->plugin_name) + 1; | ||
820 | if (pos->mime_type != NULL) | ||
821 | msize += strlen (pos->mime_type) + 1; | ||
822 | pos = pos->next; | ||
823 | } | ||
824 | size = (size_t) msize; | ||
825 | if (size != msize) | ||
826 | { | ||
827 | GNUNET_break (0); /* integer overflow */ | ||
828 | return GNUNET_SYSERR; | ||
829 | } | ||
830 | if (size >= GNUNET_MAX_MALLOC_CHECKED) | ||
831 | { | ||
832 | /* too large to be processed */ | ||
833 | return GNUNET_SYSERR; | ||
834 | } | ||
835 | ent = GNUNET_malloc (size); | ||
836 | mdata = (char *) &ent[md->item_count]; | ||
837 | off = size - (md->item_count * sizeof(struct MetaDataEntry)); | ||
838 | i = 0; | ||
839 | pos = md->items; | ||
840 | while (NULL != pos) | ||
841 | { | ||
842 | ent[i].type = htonl ((uint32_t) pos->type); | ||
843 | ent[i].format = htonl ((uint32_t) pos->format); | ||
844 | ent[i].data_size = htonl ((uint32_t) pos->data_size); | ||
845 | if (pos->plugin_name == NULL) | ||
846 | plen = 0; | ||
505 | else | 847 | else |
506 | { | 848 | plen = strlen (pos->plugin_name) + 1; |
507 | pos = size - sizeof (struct MetaDataHeader); | 849 | ent[i].plugin_name_len = htonl ( (uint32_t) plen); |
508 | } | 850 | if (pos->mime_type == NULL) |
509 | if (pos < size - sizeof (struct MetaDataHeader)) | 851 | mlen = 0; |
510 | { | 852 | else |
511 | hdr->version = htonl (HEADER_COMPRESSED); | 853 | mlen = strlen (pos->mime_type) + 1; |
512 | size = pos + sizeof (struct MetaDataHeader); | 854 | ent[i].mime_type_len = htonl ((uint32_t) mlen); |
513 | } | 855 | off -= pos->data_size; |
514 | if (size <= max) | 856 | memcpy (&mdata[off], pos->data, pos->data_size); |
515 | break; | 857 | off -= plen; |
516 | GNUNET_free (hdr); | 858 | memcpy (&mdata[off], pos->plugin_name, plen); |
517 | hdr = NULL; | 859 | off -= mlen; |
860 | memcpy (&mdata[off], pos->mime_type, mlen); | ||
861 | i++; | ||
862 | pos = pos->next; | ||
863 | } | ||
864 | GNUNET_assert (off == 0); | ||
518 | 865 | ||
519 | if ((opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_PART) == 0) | 866 | left = size; |
520 | { | 867 | for (i=0;i<md->item_count;i++) |
521 | return GNUNET_SYSERR; /* does not fit! */ | 868 | { |
869 | comp = GNUNET_NO; | ||
870 | if (0 == (opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_NO_COMPRESS)) | ||
871 | comp = try_compression ((const char*) &ent[i], | ||
872 | left, | ||
873 | &cdata, | ||
874 | &clen); | ||
875 | |||
876 | if ( (md->sbuf == NULL) && | ||
877 | (i == 0) ) | ||
878 | { | ||
879 | /* fill 'sbuf'; this "modifies" md, but since this is only | ||
880 | an internal cache we will cast away the 'const' instead | ||
881 | of making the API look strange. */ | ||
882 | vmd = (struct GNUNET_CONTAINER_MetaData*) md; | ||
883 | hdr = GNUNET_malloc (left + sizeof (struct MetaDataHeader)); | ||
884 | hdr->entries = htonl (md->item_count); | ||
885 | if (GNUNET_YES == comp) | ||
886 | { | ||
887 | hdr->size = htonl (clen); | ||
888 | hdr->version = htonl (2 | HEADER_COMPRESSED); | ||
889 | memcpy (&hdr[1], | ||
890 | cdata, | ||
891 | clen); | ||
892 | vmd->sbuf_size = clen + sizeof (struct MetaDataHeader); | ||
893 | } | ||
894 | else | ||
895 | { | ||
896 | hdr->size = htonl (left); | ||
897 | hdr->version = htonl (2); | ||
898 | memcpy (&hdr[1], | ||
899 | &ent[0], | ||
900 | left); | ||
901 | vmd->sbuf_size = left + sizeof (struct MetaDataHeader); | ||
902 | } | ||
903 | vmd->sbuf = (char*) hdr; | ||
904 | } | ||
905 | |||
906 | if ( ( (left + sizeof (struct MetaDataHeader)) <= max) || | ||
907 | ( (comp == GNUNET_YES) && | ||
908 | (clen <= max)) ) | ||
909 | { | ||
910 | /* success, this now fits! */ | ||
911 | if (GNUNET_YES == comp) | ||
912 | { | ||
913 | hdr = (struct MetaDataHeader*) *target; | ||
914 | if (hdr == NULL) | ||
915 | { | ||
916 | hdr = GNUNET_malloc (clen + sizeof (struct MetaDataHeader)); | ||
917 | *target = (char*) hdr; | ||
918 | } | ||
919 | hdr->version = htonl (2 | HEADER_COMPRESSED); | ||
920 | hdr->entries = htonl (md->item_count - i); | ||
921 | hdr->size = htonl (left); | ||
922 | memcpy (&target[sizeof(struct MetaDataHeader)], | ||
923 | cdata, | ||
924 | clen); | ||
925 | GNUNET_free (cdata); | ||
926 | GNUNET_free (ent); | ||
927 | return clen + sizeof (struct MetaDataHeader); | ||
928 | } | ||
929 | else | ||
930 | { | ||
931 | hdr = (struct MetaDataHeader*) target; | ||
932 | if (hdr == NULL) | ||
933 | { | ||
934 | hdr = GNUNET_malloc (left + sizeof (struct MetaDataHeader)); | ||
935 | *target = (char*) hdr; | ||
936 | } | ||
937 | hdr->version = htonl (2); | ||
938 | hdr->entries = htonl (md->item_count - i); | ||
939 | hdr->size = htonl (left); | ||
940 | memcpy (&target[sizeof(struct MetaDataHeader)], | ||
941 | &ent[i], | ||
942 | left); | ||
943 | GNUNET_free (ent); | ||
944 | return left + sizeof (struct MetaDataHeader); | ||
945 | } | ||
946 | } | ||
947 | |||
948 | if (0 == (opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_PART)) | ||
949 | { | ||
950 | /* does not fit! */ | ||
951 | GNUNET_free (ent); | ||
952 | return GNUNET_SYSERR; | ||
522 | } | 953 | } |
523 | /* partial serialization ok, try again with less meta-data */ | 954 | |
524 | if (size > 2 * max) | 955 | /* next iteration: ignore the corresponding meta data at the |
525 | ic = ic * 2 / 3; /* still far too big, make big reductions */ | 956 | end and try again without it */ |
526 | else | 957 | left -= sizeof (struct MetaDataEntry); |
527 | ic--; /* small steps, we're close */ | 958 | left -= pos->data_size; |
959 | if (pos->plugin_name != NULL) | ||
960 | left -= strlen (pos->plugin_name) + 1; | ||
961 | if (pos->mime_type != NULL) | ||
962 | left -= strlen (pos->mime_type) + 1; | ||
528 | } | 963 | } |
529 | GNUNET_assert (size <= max); | 964 | GNUNET_free (ent); |
530 | memcpy (target, hdr, size); | 965 | |
531 | GNUNET_free (hdr); | 966 | /* nothing fit, only write header! */ |
532 | /* extra check: deserialize! */ | 967 | hdr = (struct MetaDataHeader*) target; |
533 | #if EXTRA_CHECKS | 968 | if (hdr == NULL) |
534 | { | 969 | { |
535 | struct GNUNET_CONTAINER_MetaData *mdx; | 970 | hdr = GNUNET_malloc (sizeof (struct MetaDataHeader)); |
536 | mdx = GNUNET_CONTAINER_meta_data_deserialize (target, size); | 971 | *target = (char*) hdr; |
537 | GNUNET_assert (NULL != mdx); | 972 | } |
538 | GNUNET_CONTAINER_meta_data_destroy (mdx); | 973 | hdr->version = htonl (2); |
539 | } | 974 | hdr->entries = htonl (0); |
540 | #endif | 975 | hdr->size = htonl (0); |
541 | return size; | 976 | return sizeof (struct MetaDataHeader); |
542 | } | 977 | } |
543 | 978 | ||
979 | |||
544 | /** | 980 | /** |
545 | * Estimate (!) the size of the meta-data in | 981 | * Get the size of the full meta-data in serialized form. |
546 | * serialized form. The estimate MAY be higher | ||
547 | * than what is strictly needed. | ||
548 | * | 982 | * |
549 | * @param md metadata to inspect | 983 | * @param md metadata to inspect |
550 | * @param opt is it ok to just write SOME of the | ||
551 | * meta-data to match the size constraint, | ||
552 | * possibly discarding some data? | ||
553 | * @return number of bytes needed for serialization, -1 on error | 984 | * @return number of bytes needed for serialization, -1 on error |
554 | */ | 985 | */ |
555 | ssize_t | 986 | ssize_t |
556 | GNUNET_CONTAINER_meta_data_get_serialized_size (const struct | 987 | GNUNET_CONTAINER_meta_data_get_serialized_size (const struct GNUNET_CONTAINER_MetaData *md) |
557 | GNUNET_CONTAINER_MetaData * | ||
558 | md, | ||
559 | enum | ||
560 | GNUNET_CONTAINER_MetaDataSerializationOptions | ||
561 | opt) | ||
562 | { | 988 | { |
563 | struct MetaDataHeader *hdr; | 989 | ssize_t ret; |
564 | size_t size; | 990 | char *ptr; |
565 | size_t pos; | 991 | |
566 | uint32_t i; | 992 | if (md->sbuf != NULL) |
567 | size_t len; | 993 | return md->sbuf_size; |
568 | uint32_t ic; | 994 | ptr = NULL; |
995 | ret = GNUNET_CONTAINER_meta_data_serialize (md, | ||
996 | &ptr, | ||
997 | GNUNET_MAX_MALLOC_CHECKED, | ||
998 | GNUNET_CONTAINER_META_DATA_SERIALIZE_FULL); | ||
999 | if (ret != -1) | ||
1000 | GNUNET_free (ptr); | ||
1001 | return ret; | ||
1002 | } | ||
569 | 1003 | ||
570 | ic = md ? md->itemCount : 0; | 1004 | |
571 | size = sizeof (struct MetaDataHeader); | 1005 | /** |
572 | size += sizeof (uint32_t) * ic; | 1006 | * Decompress input, return the decompressed data |
573 | for (i = 0; i < ic; i++) | 1007 | * as output, set outputSize to the number of bytes |
574 | size += 1 + strlen (md->items[i].data); | 1008 | * that were found. |
575 | while (size % 8 != 0) | 1009 | * |
576 | size++; | 1010 | * @param input compressed data |
577 | hdr = GNUNET_malloc (size); | 1011 | * @param inputSize number of bytes in input |
578 | hdr->version = htonl (md == NULL ? 1 : 0); | 1012 | * @param outputSize expected size of the output |
579 | hdr->entries = htonl (ic); | 1013 | * @return NULL on error |
580 | for (i = 0; i < ic; i++) | 1014 | */ |
581 | ((uint32_t *) & hdr[1])[i] = htonl ((uint32_t) md->items[i].type); | 1015 | static char * |
582 | pos = sizeof (struct MetaDataHeader); | 1016 | decompress (const char *input, |
583 | pos += sizeof (uint32_t) * ic; | 1017 | size_t inputSize, |
584 | for (i = 0; i < ic; i++) | 1018 | size_t outputSize) |
585 | { | 1019 | { |
586 | len = strlen (md->items[i].data) + 1; | 1020 | char *output; |
587 | memcpy (&((char *) hdr)[pos], md->items[i].data, len); | 1021 | uLongf olen; |
588 | pos += len; | 1022 | |
589 | } | 1023 | olen = outputSize; |
590 | if ((opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_NO_COMPRESS) == 0) | 1024 | output = GNUNET_malloc (olen); |
1025 | if (Z_OK == uncompress ((Bytef *) output, | ||
1026 | &olen, (const Bytef *) input, inputSize)) | ||
591 | { | 1027 | { |
592 | pos = | 1028 | return output; |
593 | tryCompression ((char *) &hdr[1], | ||
594 | size - sizeof (struct MetaDataHeader)); | ||
595 | } | 1029 | } |
596 | else | 1030 | else |
597 | { | 1031 | { |
598 | pos = size - sizeof (struct MetaDataHeader); | 1032 | GNUNET_free (output); |
1033 | return NULL; | ||
599 | } | 1034 | } |
600 | if (pos < size - sizeof (struct MetaDataHeader)) | ||
601 | size = pos + sizeof (struct MetaDataHeader); | ||
602 | GNUNET_free (hdr); | ||
603 | return size; | ||
604 | } | 1035 | } |
605 | 1036 | ||
606 | 1037 | ||
@@ -616,41 +1047,57 @@ struct GNUNET_CONTAINER_MetaData * | |||
616 | GNUNET_CONTAINER_meta_data_deserialize (const char *input, size_t size) | 1047 | GNUNET_CONTAINER_meta_data_deserialize (const char *input, size_t size) |
617 | { | 1048 | { |
618 | struct GNUNET_CONTAINER_MetaData *md; | 1049 | struct GNUNET_CONTAINER_MetaData *md; |
619 | const struct MetaDataHeader *hdr; | 1050 | struct MetaDataHeader hdr; |
1051 | struct MetaDataEntry ent; | ||
620 | uint32_t ic; | 1052 | uint32_t ic; |
1053 | uint32_t i; | ||
621 | char *data; | 1054 | char *data; |
622 | const char *cdata; | 1055 | const char *cdata; |
1056 | uint32_t version; | ||
623 | uint32_t dataSize; | 1057 | uint32_t dataSize; |
624 | int compressed; | 1058 | int compressed; |
625 | uint32_t i; | 1059 | size_t left; |
626 | size_t pos; | 1060 | uint32_t mlen; |
627 | size_t len; | 1061 | uint32_t plen; |
628 | uint32_t version; | 1062 | uint32_t dlen; |
1063 | const char *mdata; | ||
1064 | const char *meta_data; | ||
1065 | const char *plugin_name; | ||
1066 | const char *mime_type; | ||
1067 | enum EXTRACTOR_MetaFormat format; | ||
629 | 1068 | ||
630 | if (size < sizeof (struct MetaDataHeader)) | 1069 | if (size < sizeof (struct MetaDataHeader)) |
631 | return NULL; | 1070 | return NULL; |
632 | hdr = (const struct MetaDataHeader *) input; | 1071 | memcpy (&hdr, |
633 | version = ntohl (MAKE_UNALIGNED (hdr->version)) & HEADER_VERSION_MASK; | 1072 | input, |
1073 | sizeof (struct MetaDataHeader)); | ||
1074 | version = ntohl (hdr.version) & HEADER_VERSION_MASK; | ||
1075 | compressed = (ntohl (hdr.version) & HEADER_COMPRESSED) != 0; | ||
1076 | |||
634 | if (version == 1) | 1077 | if (version == 1) |
635 | return NULL; /* null pointer */ | 1078 | return NULL; /* null pointer */ |
636 | if (version != 0) | 1079 | if (version != 2) |
637 | { | 1080 | { |
638 | GNUNET_break_op (0); /* unsupported version */ | 1081 | GNUNET_break_op (0); /* unsupported version */ |
639 | return NULL; | 1082 | return NULL; |
640 | } | 1083 | } |
641 | ic = ntohl (MAKE_UNALIGNED (hdr->entries)); | 1084 | |
642 | compressed = | 1085 | ic = ntohl (hdr.entries); |
643 | (ntohl (MAKE_UNALIGNED (hdr->version)) & HEADER_COMPRESSED) != 0; | 1086 | dataSize = ntohl (hdr.size); |
1087 | if ((sizeof (struct MetaDataEntry) * ic) > dataSize) | ||
1088 | { | ||
1089 | GNUNET_break_op (0); | ||
1090 | return NULL; | ||
1091 | } | ||
1092 | |||
644 | if (compressed) | 1093 | if (compressed) |
645 | { | 1094 | { |
646 | dataSize = | 1095 | if (dataSize >= GNUNET_MAX_MALLOC_CHECKED) |
647 | ntohl (MAKE_UNALIGNED (hdr->size)) - sizeof (struct MetaDataHeader); | ||
648 | if (dataSize > 2 * 1042 * 1024) | ||
649 | { | 1096 | { |
650 | GNUNET_break (0); | 1097 | /* make sure we don't blow our memory limit because of a mal-formed |
651 | return NULL; /* only 2 MB allowed [to make sure we don't blow | 1098 | message... */ |
652 | our memory limit because of a mal-formed | 1099 | GNUNET_break_op (0); |
653 | message... ] */ | 1100 | return NULL; |
654 | } | 1101 | } |
655 | data = | 1102 | data = |
656 | decompress ((const char *) &input[sizeof (struct MetaDataHeader)], | 1103 | decompress ((const char *) &input[sizeof (struct MetaDataHeader)], |
@@ -665,84 +1112,93 @@ GNUNET_CONTAINER_meta_data_deserialize (const char *input, size_t size) | |||
665 | else | 1112 | else |
666 | { | 1113 | { |
667 | data = NULL; | 1114 | data = NULL; |
668 | cdata = (const char *) &hdr[1]; | 1115 | cdata = (const char *) &input[sizeof (struct MetaDataHeader)]; |
669 | dataSize = size - sizeof (struct MetaDataHeader); | 1116 | if (dataSize != size - sizeof (struct MetaDataHeader)) |
670 | if (size != ntohl (MAKE_UNALIGNED (hdr->size))) | ||
671 | { | 1117 | { |
672 | GNUNET_break (0); | 1118 | GNUNET_break_op (0); |
673 | return NULL; | 1119 | return NULL; |
674 | } | 1120 | } |
675 | } | 1121 | } |
676 | 1122 | ||
677 | if ((sizeof (uint32_t) * ic + ic) > dataSize) | ||
678 | { | ||
679 | GNUNET_break (0); | ||
680 | goto FAILURE; | ||
681 | } | ||
682 | if ((ic > 0) && (cdata[dataSize - 1] != '\0')) | ||
683 | { | ||
684 | GNUNET_break (0); | ||
685 | goto FAILURE; | ||
686 | } | ||
687 | |||
688 | md = GNUNET_CONTAINER_meta_data_create (); | 1123 | md = GNUNET_CONTAINER_meta_data_create (); |
689 | GNUNET_array_grow (md->items, md->itemCount, ic); | 1124 | left = dataSize - ic * sizeof (struct MetaDataEntry); |
690 | i = 0; | 1125 | mdata = &cdata[ic * sizeof (struct MetaDataEntry)]; |
691 | pos = sizeof (uint32_t) * ic; | 1126 | for (i=0;i<ic;i++) |
692 | while ((pos < dataSize) && (i < ic)) | ||
693 | { | 1127 | { |
694 | len = strlen (&cdata[pos]) + 1; | 1128 | memcpy (&ent, |
695 | md->items[i].type = (EXTRACTOR_KeywordType) | 1129 | &cdata[i * sizeof(struct MetaDataEntry)], |
696 | ntohl (MAKE_UNALIGNED (((const uint32_t *) cdata)[i])); | 1130 | sizeof (struct MetaDataEntry)); |
697 | md->items[i].data = GNUNET_strdup (&cdata[pos]); | 1131 | format = (enum EXTRACTOR_MetaFormat) ntohl (ent.format); |
698 | pos += len; | 1132 | if ( (format != EXTRACTOR_METAFORMAT_UTF8) && |
699 | i++; | 1133 | (format != EXTRACTOR_METAFORMAT_C_STRING) && |
700 | } | 1134 | (format != EXTRACTOR_METAFORMAT_BINARY) ) |
701 | if (i < ic) | 1135 | { |
702 | { /* oops */ | 1136 | GNUNET_break_op (0); |
703 | GNUNET_CONTAINER_meta_data_destroy (md); | 1137 | break; |
704 | goto FAILURE; | 1138 | } |
705 | } | 1139 | dlen = ntohl (ent.data_size); |
706 | GNUNET_free_non_null (data); | 1140 | plen = ntohl (ent.plugin_name_len); |
707 | return md; | 1141 | mlen = ntohl (ent.mime_type_len); |
708 | FAILURE: | 1142 | if (dlen > left) |
709 | GNUNET_free_non_null (data); | 1143 | { |
710 | return NULL; /* size too small */ | 1144 | GNUNET_break_op (0); |
711 | } | 1145 | break; |
712 | 1146 | } | |
713 | /** | 1147 | left -= dlen; |
714 | * Test if two MDs are equal. | 1148 | meta_data = &mdata[left]; |
715 | * | 1149 | if ( (format == EXTRACTOR_METAFORMAT_UTF8) || |
716 | * @param md1 first value to check | 1150 | (format == EXTRACTOR_METAFORMAT_C_STRING) ) |
717 | * @param md2 other value to check | 1151 | { |
718 | * @return GNUNET_YES if they are equal | 1152 | if ( (dlen == 0) || |
719 | */ | 1153 | (mdata[left + dlen - 1] != '\0') ) |
720 | int | 1154 | { |
721 | GNUNET_CONTAINER_meta_data_test_equal (const struct GNUNET_CONTAINER_MetaData | 1155 | GNUNET_break_op (0); |
722 | *md1, | 1156 | break; |
723 | const struct GNUNET_CONTAINER_MetaData | 1157 | } |
724 | *md2) | 1158 | } |
725 | { | 1159 | if (plen > left) |
726 | uint32_t i; | 1160 | { |
727 | uint32_t j; | 1161 | GNUNET_break_op (0); |
728 | int found; | 1162 | break; |
1163 | } | ||
1164 | left -= plen; | ||
1165 | if ( (plen > 0) && | ||
1166 | (mdata[left + plen - 1] != '\0') ) | ||
1167 | { | ||
1168 | GNUNET_break_op (0); | ||
1169 | break; | ||
1170 | } | ||
1171 | if (plen == 0) | ||
1172 | plugin_name = NULL; | ||
1173 | else | ||
1174 | plugin_name = &mdata[left]; | ||
729 | 1175 | ||
730 | if (md1->itemCount != md2->itemCount) | 1176 | if (mlen > left) |
731 | return GNUNET_NO; | 1177 | { |
732 | for (i = 0; i < md1->itemCount; i++) | 1178 | GNUNET_break_op (0); |
733 | { | 1179 | break; |
734 | found = GNUNET_NO; | 1180 | } |
735 | for (j = 0; j < md2->itemCount; j++) | 1181 | left -= mlen; |
736 | if ((md1->items[i].type == md2->items[j].type) && | 1182 | if ( (mlen > 0) && |
737 | (0 == strcmp (md1->items[i].data, md2->items[j].data))) | 1183 | (mdata[left + mlen - 1] != '\0') ) |
738 | { | 1184 | { |
739 | found = GNUNET_YES; | 1185 | GNUNET_break_op (0); |
740 | break; | 1186 | break; |
741 | } | 1187 | } |
742 | if (found == GNUNET_NO) | 1188 | if (mlen == 0) |
743 | return GNUNET_NO; | 1189 | mime_type = NULL; |
1190 | else | ||
1191 | mime_type = &mdata[left]; | ||
1192 | GNUNET_CONTAINER_meta_data_insert (md, | ||
1193 | plugin_name, | ||
1194 | (enum EXTRACTOR_MetaType) ntohl (ent.type), | ||
1195 | format, | ||
1196 | mime_type, | ||
1197 | meta_data, | ||
1198 | dlen); | ||
744 | } | 1199 | } |
745 | return GNUNET_YES; | 1200 | GNUNET_free_non_null (data); |
1201 | return md; | ||
746 | } | 1202 | } |
747 | 1203 | ||
748 | 1204 | ||