real_extractor.c (13429B)
1 /* 2 * This file is part of libextractor. 3 * Copyright (C) 2021 Christian Grothoff 4 * 5 * libextractor is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published 7 * by the Free Software Foundation; either version 3, or (at your 8 * option) any later version. 9 * 10 * libextractor is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with libextractor; see the file COPYING. If not, write to the 17 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 * Boston, MA 02110-1301, USA. 19 * 20 */ 21 /** 22 * @file plugins/real_extractor.c 23 * @brief plugin to support REAL files 24 * @author Christian Grothoff 25 */ 26 #include "platform.h" 27 #include "extractor.h" 28 29 struct MediaProperties 30 { 31 uint32_t object_id; 32 uint32_t size; 33 uint16_t object_version; /* must be 0 */ 34 uint16_t stream_number; 35 uint32_t max_bit_rate; 36 uint32_t avg_bit_rate; 37 uint32_t max_packet_size; 38 uint32_t avg_packet_size; 39 uint32_t start_time; 40 uint32_t preroll; 41 uint32_t duration; 42 uint8_t stream_name_size; 43 uint8_t data[0]; /* variable length section */ 44 /* 45 uint8_t[stream_name_size] stream_name; 46 uint8_t mime_type_size; 47 uint8_t[mime_type_size] mime_type; 48 uint32_t type_specific_len; 49 uint8_t[type_specific_len] type_specific_data; 50 */ 51 }; 52 53 struct ContentDescription 54 { 55 uint32_t object_id; 56 uint32_t size; 57 uint16_t object_version; /* must be 0 */ 58 uint16_t title_len; 59 uint8_t data[0]; /* variable length section */ 60 /* 61 uint8_t[title_len] title; 62 uint16_t author_len; 63 uint8_t[author_len] author; 64 uint16_t copyright_len; 65 uint8_t[copyright_len] copyright; 66 uint16_t comment_len; 67 uint8_t[comment_len] comment; 68 */ 69 }; 70 /* author, copyright and comment are supposed to be ASCII */ 71 72 73 #define REAL_HEADER 0x2E524d46 74 #define MDPR_HEADER 0x4D445052 75 #define CONT_HEADER 0x434F4e54 76 #define RAFF4_HEADER 0x2E7261FD 77 78 79 /** 80 * Give meta data to LE. 81 * 82 * @param s utf-8 string meta data value 83 * @param t type of the meta data 84 */ 85 #define ADD(s,t) do { \ 86 if (0 != ec->proc (ec->cls, "real", t, \ 87 EXTRACTOR_METAFORMAT_C_STRING, \ 88 "text/plain", s, strlen (s) + 1)) \ 89 { return; } \ 90 } while (0) 91 92 93 static void 94 processMediaProperties (const struct MediaProperties *prop, 95 struct EXTRACTOR_ExtractContext *ec) 96 { 97 uint8_t mime_type_size; 98 uint32_t prop_size; 99 100 prop_size = ntohl (prop->size); 101 if (prop_size <= sizeof (struct MediaProperties)) 102 return; 103 if (0 != prop->object_version) 104 return; 105 if (prop_size <= prop->stream_name_size + sizeof (uint8_t) 106 + sizeof (struct MediaProperties)) 107 return; 108 mime_type_size = prop->data[prop->stream_name_size]; 109 if (prop_size > prop->stream_name_size + sizeof (uint8_t) 110 + mime_type_size + sizeof (struct MediaProperties)) 111 { 112 char data[mime_type_size + 1]; 113 114 memcpy (data, 115 &prop->data[prop->stream_name_size + 1], 116 mime_type_size); 117 data[mime_type_size] = '\0'; 118 ADD (data, 119 EXTRACTOR_METATYPE_MIMETYPE); 120 } 121 } 122 123 124 static void 125 processContentDescription (const struct ContentDescription *prop, 126 struct EXTRACTOR_ExtractContext *ec) 127 { 128 uint16_t author_len; 129 uint16_t copyright_len; 130 uint16_t comment_len; 131 uint16_t title_len; 132 uint32_t prop_size; 133 134 prop_size = ntohl (prop->size); 135 if (prop_size <= sizeof (struct ContentDescription)) 136 return; 137 if (0 != prop->object_version) 138 return; 139 title_len = ntohs (prop->title_len); 140 if (prop_size <= 141 title_len 142 + sizeof (struct ContentDescription)) 143 return; 144 if (title_len > 0) 145 { 146 char title[title_len + 1]; 147 148 memcpy (title, 149 &prop->data[0], 150 title_len); 151 title[title_len] = '\0'; 152 ADD (title, 153 EXTRACTOR_METATYPE_TITLE); 154 } 155 if (prop_size <= 156 title_len 157 + sizeof (uint16_t) 158 + sizeof (struct ContentDescription)) 159 return; 160 author_len = ntohs (*(uint16_t *) &prop->data[title_len]); 161 if (prop_size <= 162 title_len 163 + sizeof (uint16_t) 164 + author_len 165 + sizeof (struct ContentDescription)) 166 return; 167 if (author_len > 0) 168 { 169 char author[author_len + 1]; 170 171 memcpy (author, 172 &prop->data[title_len 173 + sizeof (uint16_t)], 174 author_len); 175 author[author_len] = '\0'; 176 ADD (author, 177 EXTRACTOR_METATYPE_AUTHOR_NAME); 178 } 179 if (prop_size <= 180 title_len 181 + sizeof (uint16_t) 182 + author_len 183 + sizeof (uint16_t) 184 + sizeof (struct ContentDescription)) 185 return; 186 copyright_len = ntohs (*(uint16_t *) &prop->data[title_len 187 + author_len 188 + sizeof (uint16_t)]); 189 if (prop_size <= 190 title_len 191 + sizeof (uint16_t) 192 + author_len 193 + sizeof (uint16_t) 194 + copyright_len 195 + sizeof (struct ContentDescription)) 196 return; 197 if (copyright_len > 0) 198 { 199 char copyright[copyright_len + 1]; 200 201 memcpy (copyright, 202 &prop->data[title_len 203 + sizeof (uint16_t) * 2 204 + author_len], 205 copyright_len); 206 copyright[copyright_len] = '\0'; 207 ADD (copyright, 208 EXTRACTOR_METATYPE_COPYRIGHT); 209 } 210 211 if (prop_size <= 212 title_len 213 + sizeof (uint16_t) 214 + author_len 215 + sizeof (uint16_t) 216 + copyright_len 217 + sizeof (uint16_t) 218 + sizeof (struct ContentDescription)) 219 return; 220 comment_len = ntohs (*(uint16_t *) &prop->data[title_len 221 + author_len 222 + copyright_len 223 + 2 * sizeof (uint16_t)]); 224 if (prop_size < 225 title_len 226 + sizeof (uint16_t) 227 + author_len 228 + sizeof (uint16_t) 229 + copyright_len 230 + sizeof (uint16_t) 231 + comment_len 232 + sizeof (struct ContentDescription)) 233 return; 234 235 if (comment_len > 0) 236 { 237 char comment[comment_len + 1]; 238 239 memcpy (comment, 240 &prop->data[title_len 241 + sizeof (uint16_t) * 3 242 + author_len 243 + copyright_len], 244 comment_len); 245 comment[comment_len] = '\0'; 246 ADD (comment, 247 EXTRACTOR_METATYPE_COMMENT); 248 } 249 } 250 251 252 struct RAFF_Header 253 { 254 uint16_t version; 255 }; 256 257 struct RAFF3_Header 258 { 259 uint8_t unknown[10]; 260 uint32_t data_size; 261 /* 262 uint8_t tlen; 263 uint8_t title[tlen]; 264 uint8_t alen; 265 uint8_t author[alen]; 266 uint8_t clen; 267 uint8_t copyright[clen]; 268 uint8_t aplen; 269 uint8_t app[aplen]; */ 270 }; 271 272 273 #define RAFF3_HDR_SIZE 14 274 275 276 struct RAFF4_Header 277 { 278 uint16_t version; 279 uint16_t revision; 280 uint16_t header_length; 281 uint16_t compression_type; 282 uint32_t granularity; 283 uint32_t total_bytes; 284 uint32_t bytes_per_minute; 285 uint32_t bytes_per_minute2; 286 uint16_t interleave_factor; 287 uint16_t interleave_block_size; 288 uint32_t user_data; 289 float sample_rate; 290 uint16_t sample_size; 291 uint16_t channels; 292 uint8_t interleave_code[5]; 293 uint8_t compression_code[5]; 294 uint8_t is_interleaved; 295 uint8_t copy_byte; 296 uint8_t stream_type; 297 /* 298 uint8_t tlen; 299 uint8_t title[tlen]; 300 uint8_t alen; 301 uint8_t author[alen]; 302 uint8_t clen; 303 uint8_t copyright[clen]; 304 uint8_t aplen; 305 uint8_t app[aplen]; */ 306 }; 307 308 #define RAFF4_HDR_SIZE 53 309 310 311 static void 312 extract_raff3 (struct EXTRACTOR_ExtractContext *ec, 313 const void *ptr, 314 size_t size) 315 { 316 const uint8_t *data = ptr; 317 uint8_t tlen; 318 uint8_t alen; 319 uint8_t clen; 320 uint8_t aplen; 321 322 if (size <= RAFF3_HDR_SIZE + 8) 323 return; 324 tlen = data[8 + RAFF3_HDR_SIZE]; 325 if (tlen + RAFF3_HDR_SIZE + 12 > size) 326 return; 327 if (tlen > 0) 328 { 329 char x[tlen + 1]; 330 331 memcpy (x, 332 &data[9 + RAFF3_HDR_SIZE], 333 tlen); 334 x[tlen] = '\0'; 335 ADD (x, 336 EXTRACTOR_METATYPE_TITLE); 337 } 338 alen = data[9 + tlen + RAFF3_HDR_SIZE]; 339 if (tlen + alen + RAFF3_HDR_SIZE + 12 > size) 340 return; 341 if (alen > 0) 342 { 343 char x[alen + 1]; 344 345 memcpy (x, 346 &data[10 + RAFF3_HDR_SIZE + tlen], 347 alen); 348 x[alen] = '\0'; 349 ADD (x, 350 EXTRACTOR_METATYPE_AUTHOR_NAME); 351 } 352 clen = data[10 + tlen + alen + RAFF3_HDR_SIZE]; 353 if (tlen + alen + clen + RAFF3_HDR_SIZE + 12 > size) 354 return; 355 if (clen > 0) 356 { 357 char x[clen + 1]; 358 359 memcpy (x, 360 &data[11 + RAFF4_HDR_SIZE + tlen + alen], 361 clen); 362 x[clen] = '\0'; 363 ADD (x, 364 EXTRACTOR_METATYPE_COPYRIGHT); 365 } 366 aplen = data[11 + tlen + clen + alen + RAFF3_HDR_SIZE]; 367 if (tlen + alen + clen + aplen + RAFF3_HDR_SIZE + 12 > size) 368 return; 369 if (aplen > 0) 370 { 371 char x[aplen + 1]; 372 373 memcpy (x, 374 &data[12 + RAFF4_HDR_SIZE + tlen + alen + clen], 375 aplen); 376 x[aplen] = '\0'; 377 ADD (x, 378 EXTRACTOR_METATYPE_UNKNOWN); 379 } 380 } 381 382 383 static void 384 extract_raff4 (struct EXTRACTOR_ExtractContext *ec, 385 const void *ptr, 386 size_t size) 387 { 388 const uint8_t *data = ptr; 389 uint8_t tlen; 390 uint8_t alen; 391 uint8_t clen; 392 uint8_t aplen; 393 394 if (size <= RAFF4_HDR_SIZE + 16 + 4) 395 return; 396 tlen = data[16 + RAFF4_HDR_SIZE]; 397 if (tlen + RAFF4_HDR_SIZE + 20 > size) 398 return; 399 alen = data[17 + tlen + RAFF4_HDR_SIZE]; 400 if (tlen + alen + RAFF4_HDR_SIZE + 20 > size) 401 return; 402 clen = data[18 + tlen + alen + RAFF4_HDR_SIZE]; 403 if (tlen + alen + clen + RAFF4_HDR_SIZE + 20 > size) 404 return; 405 aplen = data[19 + tlen + clen + alen + RAFF4_HDR_SIZE]; 406 if (tlen + alen + clen + aplen + RAFF4_HDR_SIZE + 20 > size) 407 return; 408 if (tlen > 0) 409 { 410 char x[tlen + 1]; 411 412 memcpy (x, 413 &data[17 + RAFF4_HDR_SIZE], 414 tlen); 415 x[tlen] = '\0'; 416 ADD (x, 417 EXTRACTOR_METATYPE_TITLE); 418 } 419 if (alen > 0) 420 { 421 char x[alen + 1]; 422 423 memcpy (x, 424 &data[18 + RAFF4_HDR_SIZE + tlen], 425 alen); 426 x[alen] = '\0'; 427 ADD (x, 428 EXTRACTOR_METATYPE_AUTHOR_NAME); 429 } 430 if (clen > 0) 431 { 432 char x[clen + 1]; 433 434 memcpy (x, 435 &data[19 + RAFF4_HDR_SIZE + tlen + alen], 436 clen); 437 x[clen] = '\0'; 438 ADD (x, 439 EXTRACTOR_METATYPE_COPYRIGHT); 440 } 441 if (aplen > 0) 442 { 443 char x[aplen + 1]; 444 445 memcpy (x, 446 &data[20 + RAFF4_HDR_SIZE + tlen + alen + clen], 447 aplen); 448 x[aplen] = '\0'; 449 ADD (x, 450 EXTRACTOR_METATYPE_UNKNOWN); 451 } 452 } 453 454 455 static void 456 extract_raff (struct EXTRACTOR_ExtractContext *ec, 457 const void *ptr, 458 size_t size) 459 { 460 const uint8_t *data = ptr; 461 const struct RAFF_Header *hdr; 462 463 /* HELIX */ 464 if (size <= sizeof (*hdr) + 4) 465 return; 466 ADD ("audio/vnd.rn-realaudio", 467 EXTRACTOR_METATYPE_MIMETYPE); 468 hdr = (const struct RAFF_Header *) &data[4]; 469 switch (ntohs (hdr->version)) 470 { 471 case 3: 472 extract_raff3 (ec, 473 ptr, 474 size); 475 break; 476 case 4: 477 extract_raff4 (ec, 478 ptr, 479 size); 480 break; 481 } 482 } 483 484 485 /* old real format */ 486 static void 487 extract_real (struct EXTRACTOR_ExtractContext *ec, 488 const void *data, 489 size_t size) 490 { 491 uint64_t off = 0; 492 size_t pos = 0; 493 494 while (1) 495 { 496 uint32_t length; 497 498 if ( (pos + 8 > size) || 499 (pos + 8 < pos) || 500 (pos + (length = ntohl (((uint32_t *) (data + pos))[1])) > size) ) 501 { 502 uint64_t noff; 503 void *in; 504 ssize_t isize; 505 506 noff = ec->seek (ec->cls, 507 off + pos, 508 SEEK_SET); 509 if (-1 == noff) 510 return; 511 isize = ec->read (ec->cls, 512 &in, 513 32 * 1024); 514 if (isize < 8) 515 return; 516 data = in; 517 size = isize; 518 off = noff; 519 pos = 0; 520 } 521 if (length <= 8) 522 return; 523 if ( (pos + length > size) || 524 (pos + length < pos) ) 525 return; 526 switch (ntohl (((uint32_t *) (data + pos))[0])) 527 { 528 case MDPR_HEADER: 529 processMediaProperties (data + pos, 530 ec); 531 pos += length; 532 break; 533 case CONT_HEADER: 534 processContentDescription (data + pos, 535 ec); 536 pos += length; 537 break; 538 case REAL_HEADER: /* treat like default */ 539 default: 540 pos += length; 541 break; 542 } 543 } 544 } 545 546 547 /** 548 * "extract" metadata from a REAL file 549 * 550 * @param ec extraction context 551 */ 552 void 553 EXTRACTOR_real_extract_method (struct EXTRACTOR_ExtractContext *ec) 554 { 555 void *data; 556 size_t n; 557 558 n = ec->read (ec->cls, 559 &data, 560 sizeof (struct RAFF4_Header) + 4 * 256); 561 if (n < sizeof (uint32_t)) 562 return; 563 switch (ntohl (*(uint32_t *) data)) 564 { 565 case RAFF4_HEADER: 566 extract_raff (ec, 567 data, 568 n); 569 break; 570 case REAL_HEADER: 571 extract_real (ec, 572 data, 573 n); 574 break; 575 } 576 } 577 578 579 /* end of real_extractor.c */