libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

real_extractor.c (13429B)


      1 /*
      2  * This file is part of libextractor.
      3  * Copyright (C) 2021 Christian Grothoff
      4  *
      5  * libextractor is free software; you can redistribute it and/or modify
      6  * it under the terms of the GNU General Public License as published
      7  * by the Free Software Foundation; either version 3, or (at your
      8  * option) any later version.
      9  *
     10  * libextractor is distributed in the hope that it will be useful, but
     11  * WITHOUT ANY WARRANTY; without even the implied warranty of
     12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13  * General Public License for more details.
     14  *
     15  * You should have received a copy of the GNU General Public License
     16  * along with libextractor; see the file COPYING.  If not, write to the
     17  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     18  * Boston, MA 02110-1301, USA.
     19  *
     20  */
     21 /**
     22  * @file plugins/real_extractor.c
     23  * @brief plugin to support REAL files
     24  * @author Christian Grothoff
     25  */
     26 #include "platform.h"
     27 #include "extractor.h"
     28 
     29 struct MediaProperties
     30 {
     31   uint32_t object_id;
     32   uint32_t size;
     33   uint16_t object_version;        /* must be 0 */
     34   uint16_t stream_number;
     35   uint32_t max_bit_rate;
     36   uint32_t avg_bit_rate;
     37   uint32_t max_packet_size;
     38   uint32_t avg_packet_size;
     39   uint32_t start_time;
     40   uint32_t preroll;
     41   uint32_t duration;
     42   uint8_t stream_name_size;
     43   uint8_t data[0];                /* variable length section */
     44   /*
     45      uint8_t[stream_name_size]     stream_name;
     46      uint8_t                       mime_type_size;
     47      uint8_t[mime_type_size]       mime_type;
     48      uint32_t                      type_specific_len;
     49      uint8_t[type_specific_len]    type_specific_data;
     50    */
     51 };
     52 
     53 struct ContentDescription
     54 {
     55   uint32_t object_id;
     56   uint32_t size;
     57   uint16_t object_version;        /* must be 0 */
     58   uint16_t title_len;
     59   uint8_t data[0];                /* variable length section */
     60   /*
     61      uint8_t[title_len]  title;
     62      uint16_t    author_len;
     63      uint8_t[author_len]  author;
     64      uint16_t    copyright_len;
     65      uint8_t[copyright_len]  copyright;
     66      uint16_t    comment_len;
     67      uint8_t[comment_len]  comment;
     68    */
     69 };
     70 /* author, copyright and comment are supposed to be ASCII */
     71 
     72 
     73 #define REAL_HEADER 0x2E524d46
     74 #define MDPR_HEADER 0x4D445052
     75 #define CONT_HEADER 0x434F4e54
     76 #define RAFF4_HEADER 0x2E7261FD
     77 
     78 
     79 /**
     80  * Give meta data to LE.
     81  *
     82  * @param s utf-8 string meta data value
     83  * @param t type of the meta data
     84  */
     85 #define ADD(s,t) do { \
     86     if (0 != ec->proc (ec->cls, "real", t, \
     87                        EXTRACTOR_METAFORMAT_C_STRING, \
     88                        "text/plain", s, strlen (s) + 1)) \
     89     { return; } \
     90 } while (0)
     91 
     92 
     93 static void
     94 processMediaProperties (const struct MediaProperties *prop,
     95                         struct EXTRACTOR_ExtractContext *ec)
     96 {
     97   uint8_t mime_type_size;
     98   uint32_t prop_size;
     99 
    100   prop_size = ntohl (prop->size);
    101   if (prop_size <= sizeof (struct MediaProperties))
    102     return;
    103   if (0 != prop->object_version)
    104     return;
    105   if (prop_size <= prop->stream_name_size + sizeof (uint8_t)
    106       + sizeof (struct MediaProperties))
    107     return;
    108   mime_type_size = prop->data[prop->stream_name_size];
    109   if (prop_size > prop->stream_name_size + sizeof (uint8_t)
    110       + mime_type_size + sizeof (struct MediaProperties))
    111   {
    112     char data[mime_type_size + 1];
    113 
    114     memcpy (data,
    115             &prop->data[prop->stream_name_size + 1],
    116             mime_type_size);
    117     data[mime_type_size] = '\0';
    118     ADD (data,
    119          EXTRACTOR_METATYPE_MIMETYPE);
    120   }
    121 }
    122 
    123 
    124 static void
    125 processContentDescription (const struct ContentDescription *prop,
    126                            struct EXTRACTOR_ExtractContext *ec)
    127 {
    128   uint16_t author_len;
    129   uint16_t copyright_len;
    130   uint16_t comment_len;
    131   uint16_t title_len;
    132   uint32_t prop_size;
    133 
    134   prop_size = ntohl (prop->size);
    135   if (prop_size <= sizeof (struct ContentDescription))
    136     return;
    137   if (0 != prop->object_version)
    138     return;
    139   title_len = ntohs (prop->title_len);
    140   if (prop_size <=
    141       title_len
    142       + sizeof (struct ContentDescription))
    143     return;
    144   if (title_len > 0)
    145   {
    146     char title[title_len + 1];
    147 
    148     memcpy (title,
    149             &prop->data[0],
    150             title_len);
    151     title[title_len] = '\0';
    152     ADD (title,
    153          EXTRACTOR_METATYPE_TITLE);
    154   }
    155   if (prop_size <=
    156       title_len
    157       + sizeof (uint16_t)
    158       + sizeof (struct ContentDescription))
    159     return;
    160   author_len = ntohs (*(uint16_t *) &prop->data[title_len]);
    161   if (prop_size <=
    162       title_len
    163       + sizeof (uint16_t)
    164       + author_len
    165       + sizeof (struct ContentDescription))
    166     return;
    167   if (author_len > 0)
    168   {
    169     char author[author_len + 1];
    170 
    171     memcpy (author,
    172             &prop->data[title_len
    173                         + sizeof (uint16_t)],
    174             author_len);
    175     author[author_len] = '\0';
    176     ADD (author,
    177          EXTRACTOR_METATYPE_AUTHOR_NAME);
    178   }
    179   if (prop_size <=
    180       title_len
    181       + sizeof (uint16_t)
    182       + author_len
    183       + sizeof (uint16_t)
    184       + sizeof (struct ContentDescription))
    185     return;
    186   copyright_len = ntohs (*(uint16_t *) &prop->data[title_len
    187                                                    + author_len
    188                                                    + sizeof (uint16_t)]);
    189   if (prop_size <=
    190       title_len
    191       + sizeof (uint16_t)
    192       + author_len
    193       + sizeof (uint16_t)
    194       + copyright_len
    195       + sizeof (struct ContentDescription))
    196     return;
    197   if (copyright_len > 0)
    198   {
    199     char copyright[copyright_len + 1];
    200 
    201     memcpy (copyright,
    202             &prop->data[title_len
    203                         + sizeof (uint16_t) * 2
    204                         + author_len],
    205             copyright_len);
    206     copyright[copyright_len] = '\0';
    207     ADD (copyright,
    208          EXTRACTOR_METATYPE_COPYRIGHT);
    209   }
    210 
    211   if (prop_size <=
    212       title_len
    213       + sizeof (uint16_t)
    214       + author_len
    215       + sizeof (uint16_t)
    216       + copyright_len
    217       + sizeof (uint16_t)
    218       + sizeof (struct ContentDescription))
    219     return;
    220   comment_len = ntohs (*(uint16_t *) &prop->data[title_len
    221                                                  + author_len
    222                                                  + copyright_len
    223                                                  + 2 * sizeof (uint16_t)]);
    224   if (prop_size <
    225       title_len
    226       + sizeof (uint16_t)
    227       + author_len
    228       + sizeof (uint16_t)
    229       + copyright_len
    230       + sizeof (uint16_t)
    231       + comment_len
    232       + sizeof (struct ContentDescription))
    233     return;
    234 
    235   if (comment_len > 0)
    236   {
    237     char comment[comment_len + 1];
    238 
    239     memcpy (comment,
    240             &prop->data[title_len
    241                         + sizeof (uint16_t) * 3
    242                         + author_len
    243                         + copyright_len],
    244             comment_len);
    245     comment[comment_len] = '\0';
    246     ADD (comment,
    247          EXTRACTOR_METATYPE_COMMENT);
    248   }
    249 }
    250 
    251 
    252 struct RAFF_Header
    253 {
    254   uint16_t version;
    255 };
    256 
    257 struct RAFF3_Header
    258 {
    259   uint8_t unknown[10];
    260   uint32_t data_size;
    261   /*
    262      uint8_t tlen;
    263      uint8_t title[tlen];
    264      uint8_t alen;
    265      uint8_t author[alen];
    266      uint8_t clen;
    267      uint8_t copyright[clen];
    268      uint8_t aplen;
    269      uint8_t app[aplen]; */
    270 };
    271 
    272 
    273 #define RAFF3_HDR_SIZE 14
    274 
    275 
    276 struct RAFF4_Header
    277 {
    278   uint16_t version;
    279   uint16_t revision;
    280   uint16_t header_length;
    281   uint16_t compression_type;
    282   uint32_t granularity;
    283   uint32_t total_bytes;
    284   uint32_t bytes_per_minute;
    285   uint32_t bytes_per_minute2;
    286   uint16_t interleave_factor;
    287   uint16_t interleave_block_size;
    288   uint32_t user_data;
    289   float sample_rate;
    290   uint16_t sample_size;
    291   uint16_t channels;
    292   uint8_t interleave_code[5];
    293   uint8_t compression_code[5];
    294   uint8_t is_interleaved;
    295   uint8_t copy_byte;
    296   uint8_t stream_type;
    297   /*
    298      uint8_t tlen;
    299      uint8_t title[tlen];
    300      uint8_t alen;
    301      uint8_t author[alen];
    302      uint8_t clen;
    303      uint8_t copyright[clen];
    304      uint8_t aplen;
    305      uint8_t app[aplen]; */
    306 };
    307 
    308 #define RAFF4_HDR_SIZE 53
    309 
    310 
    311 static void
    312 extract_raff3 (struct EXTRACTOR_ExtractContext *ec,
    313                const void *ptr,
    314                size_t size)
    315 {
    316   const uint8_t *data = ptr;
    317   uint8_t tlen;
    318   uint8_t alen;
    319   uint8_t clen;
    320   uint8_t aplen;
    321 
    322   if (size <= RAFF3_HDR_SIZE + 8)
    323     return;
    324   tlen = data[8 + RAFF3_HDR_SIZE];
    325   if (tlen + RAFF3_HDR_SIZE + 12 > size)
    326     return;
    327   if (tlen > 0)
    328   {
    329     char x[tlen + 1];
    330 
    331     memcpy (x,
    332             &data[9 + RAFF3_HDR_SIZE],
    333             tlen);
    334     x[tlen] = '\0';
    335     ADD (x,
    336          EXTRACTOR_METATYPE_TITLE);
    337   }
    338   alen = data[9 + tlen + RAFF3_HDR_SIZE];
    339   if (tlen + alen + RAFF3_HDR_SIZE + 12 > size)
    340     return;
    341   if (alen > 0)
    342   {
    343     char x[alen + 1];
    344 
    345     memcpy (x,
    346             &data[10 + RAFF3_HDR_SIZE + tlen],
    347             alen);
    348     x[alen] = '\0';
    349     ADD (x,
    350          EXTRACTOR_METATYPE_AUTHOR_NAME);
    351   }
    352   clen = data[10 + tlen + alen + RAFF3_HDR_SIZE];
    353   if (tlen + alen + clen + RAFF3_HDR_SIZE + 12 > size)
    354     return;
    355   if (clen > 0)
    356   {
    357     char x[clen + 1];
    358 
    359     memcpy (x,
    360             &data[11 + RAFF4_HDR_SIZE + tlen + alen],
    361             clen);
    362     x[clen] = '\0';
    363     ADD (x,
    364          EXTRACTOR_METATYPE_COPYRIGHT);
    365   }
    366   aplen = data[11 + tlen + clen + alen + RAFF3_HDR_SIZE];
    367   if (tlen + alen + clen + aplen + RAFF3_HDR_SIZE + 12 > size)
    368     return;
    369   if (aplen > 0)
    370   {
    371     char x[aplen + 1];
    372 
    373     memcpy (x,
    374             &data[12 + RAFF4_HDR_SIZE + tlen + alen + clen],
    375             aplen);
    376     x[aplen] = '\0';
    377     ADD (x,
    378          EXTRACTOR_METATYPE_UNKNOWN);
    379   }
    380 }
    381 
    382 
    383 static void
    384 extract_raff4 (struct EXTRACTOR_ExtractContext *ec,
    385                const void *ptr,
    386                size_t size)
    387 {
    388   const uint8_t *data = ptr;
    389   uint8_t tlen;
    390   uint8_t alen;
    391   uint8_t clen;
    392   uint8_t aplen;
    393 
    394   if (size <= RAFF4_HDR_SIZE + 16 + 4)
    395     return;
    396   tlen = data[16 + RAFF4_HDR_SIZE];
    397   if (tlen + RAFF4_HDR_SIZE + 20 > size)
    398     return;
    399   alen = data[17 + tlen + RAFF4_HDR_SIZE];
    400   if (tlen + alen + RAFF4_HDR_SIZE + 20 > size)
    401     return;
    402   clen = data[18 + tlen + alen + RAFF4_HDR_SIZE];
    403   if (tlen + alen + clen + RAFF4_HDR_SIZE + 20 > size)
    404     return;
    405   aplen = data[19 + tlen + clen + alen + RAFF4_HDR_SIZE];
    406   if (tlen + alen + clen + aplen + RAFF4_HDR_SIZE + 20 > size)
    407     return;
    408   if (tlen > 0)
    409   {
    410     char x[tlen + 1];
    411 
    412     memcpy (x,
    413             &data[17 + RAFF4_HDR_SIZE],
    414             tlen);
    415     x[tlen] = '\0';
    416     ADD (x,
    417          EXTRACTOR_METATYPE_TITLE);
    418   }
    419   if (alen > 0)
    420   {
    421     char x[alen + 1];
    422 
    423     memcpy (x,
    424             &data[18 + RAFF4_HDR_SIZE + tlen],
    425             alen);
    426     x[alen] = '\0';
    427     ADD (x,
    428          EXTRACTOR_METATYPE_AUTHOR_NAME);
    429   }
    430   if (clen > 0)
    431   {
    432     char x[clen + 1];
    433 
    434     memcpy (x,
    435             &data[19 + RAFF4_HDR_SIZE + tlen + alen],
    436             clen);
    437     x[clen] = '\0';
    438     ADD (x,
    439          EXTRACTOR_METATYPE_COPYRIGHT);
    440   }
    441   if (aplen > 0)
    442   {
    443     char x[aplen + 1];
    444 
    445     memcpy (x,
    446             &data[20 + RAFF4_HDR_SIZE + tlen + alen + clen],
    447             aplen);
    448     x[aplen] = '\0';
    449     ADD (x,
    450          EXTRACTOR_METATYPE_UNKNOWN);
    451   }
    452 }
    453 
    454 
    455 static void
    456 extract_raff (struct EXTRACTOR_ExtractContext *ec,
    457               const void *ptr,
    458               size_t size)
    459 {
    460   const uint8_t *data = ptr;
    461   const struct RAFF_Header *hdr;
    462 
    463   /* HELIX */
    464   if (size <= sizeof (*hdr) + 4)
    465     return;
    466   ADD ("audio/vnd.rn-realaudio",
    467        EXTRACTOR_METATYPE_MIMETYPE);
    468   hdr = (const struct RAFF_Header *) &data[4];
    469   switch (ntohs (hdr->version))
    470   {
    471   case 3:
    472     extract_raff3 (ec,
    473                    ptr,
    474                    size);
    475     break;
    476   case 4:
    477     extract_raff4 (ec,
    478                    ptr,
    479                    size);
    480     break;
    481   }
    482 }
    483 
    484 
    485 /* old real format */
    486 static void
    487 extract_real (struct EXTRACTOR_ExtractContext *ec,
    488               const void *data,
    489               size_t size)
    490 {
    491   uint64_t off = 0;
    492   size_t pos = 0;
    493 
    494   while (1)
    495   {
    496     uint32_t length;
    497 
    498     if ( (pos + 8 > size) ||
    499          (pos + 8 < pos) ||
    500          (pos + (length = ntohl (((uint32_t *) (data + pos))[1])) > size) )
    501     {
    502       uint64_t noff;
    503       void *in;
    504       ssize_t isize;
    505 
    506       noff = ec->seek (ec->cls,
    507                        off + pos,
    508                        SEEK_SET);
    509       if (-1 == noff)
    510         return;
    511       isize = ec->read (ec->cls,
    512                         &in,
    513                         32 * 1024);
    514       if (isize < 8)
    515         return;
    516       data = in;
    517       size = isize;
    518       off = noff;
    519       pos = 0;
    520     }
    521     if (length <= 8)
    522       return;
    523     if ( (pos + length > size) ||
    524          (pos + length < pos) )
    525       return;
    526     switch (ntohl (((uint32_t *) (data + pos))[0]))
    527     {
    528     case MDPR_HEADER:
    529       processMediaProperties (data + pos,
    530                               ec);
    531       pos += length;
    532       break;
    533     case CONT_HEADER:
    534       processContentDescription (data + pos,
    535                                  ec);
    536       pos += length;
    537       break;
    538     case REAL_HEADER:          /* treat like default */
    539     default:
    540       pos += length;
    541       break;
    542     }
    543   }
    544 }
    545 
    546 
    547 /**
    548  * "extract" metadata from a REAL file
    549  *
    550  * @param ec extraction context
    551  */
    552 void
    553 EXTRACTOR_real_extract_method (struct EXTRACTOR_ExtractContext *ec)
    554 {
    555   void *data;
    556   size_t n;
    557 
    558   n = ec->read (ec->cls,
    559                 &data,
    560                 sizeof (struct RAFF4_Header) + 4 * 256);
    561   if (n < sizeof (uint32_t))
    562     return;
    563   switch (ntohl (*(uint32_t *) data))
    564   {
    565   case RAFF4_HEADER:
    566     extract_raff (ec,
    567                   data,
    568                   n);
    569     break;
    570   case REAL_HEADER:
    571     extract_real (ec,
    572                   data,
    573                   n);
    574     break;
    575   }
    576 }
    577 
    578 
    579 /* end of real_extractor.c */