libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

extractor_ipc.h (12596B)


      1 /*
      2      This file is part of libextractor.
      3      Copyright (C) 2012 Vidyut Samanta and Christian Grothoff
      4 
      5      libextractor is free software; you can redistribute it and/or modify
      6      it under the terms of the GNU General Public License as published
      7      by the Free Software Foundation; either version 3, or (at your
      8      option) any later version.
      9 
     10      libextractor is distributed in the hope that it will be useful, but
     11      WITHOUT ANY WARRANTY; without even the implied warranty of
     12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13      General Public License for more details.
     14 
     15      You should have received a copy of the GNU General Public License
     16      along with libextractor; see the file COPYING.  If not, write to the
     17      Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     18      Boston, MA 02110-1301, USA.
     19  */
     20 /**
     21  * @file main/extractor_ipc.h
     22  * @brief IPC with plugin (OS-independent API)
     23  * @author Christian Grothoff
     24  *
     25  * @detail
     26  * The IPC communication between plugins and the main library works
     27  * as follows.  Each message begins with a 1-character opcode which
     28  * specifies the message type.  The main library starts the plugins
     29  * by forking the helper process and establishes two pipes for
     30  * communication in both directions.
     31  * First, the main library send an 'INIT_STATE' message
     32  * to the plugin.  The start message specifies the name (and size)
     33  * of a shared memory segment which will contain parts of the (uncompressed)
     34  * data of the file that is being processed.  The same shared memory
     35  * segment is used throughout the lifetime of the plugin.
     36  *
     37  * Then, the following messages are exchanged for each file.
     38  * First, an EXTRACT_START message is sent with the specific
     39  * size of the file (or -1 if unknown) and the number of bytes
     40  * ready in the shared memory segment.  The plugin then answers
     41  * with either:
     42  * 1) MESSAGE_DONE to indicate that no further processing is
     43  *    required for this file; the IPC continues with the
     44  *    EXTRACT_START message for the next file afterwards;
     45  * 2) MESSAGE_SEEK to indicate that the plugin would like to
     46  *    read data at a different offset; the main library can
     47  *    then either
     48  *    a) respond with a MESSAGE_DISCARD_STATE to
     49  *       tell the plugin to abort processing (the next message will
     50  *       then be another EXTRACT_START)
     51  *    b) respond with a MESSAGE_UPDATED_SHM which notifies the
     52  *       plugin that the shared memory segment was moved to a
     53  *       different location in the overall file; the target of the
     54  *       seek should now be within the new range (but does NOT have
     55  *       to be at the beginning of the seek)
     56  * 3) MESSAGE_META to provide extracted meta data to the main
     57  *    library.  The main library can then either:
     58  *    a) respond with a MESSAGE_DISCARD_STATE to
     59  *       tell the plugin to abort processing (the next message will
     60  *       then be another EXTRACT_START)
     61  *    b) respond with a MESSAGE_CONTINUE_EXTRACTING to
     62  *       tell the plugin to continue extracting meta data; in this
     63  *       case, the plugin is then expected to produce another
     64  *       MESSAGE_DONE, MESSAGE_SEEK or MESSAGE_META round of messages.
     65  */
     66 #ifndef EXTRACTOR_IPC_H
     67 #define EXTRACTOR_IPC_H
     68 
     69 #include "extractor_datasource.h"
     70 
     71 
     72 /**
     73  * How long do we allow an individual meta data object to be?
     74  * Used to guard against (broken) plugns causing us to use
     75  * excessive amounts of memory.
     76  */
     77 #define MAX_META_DATA 32 * 1024 * 1024
     78 
     79 /**
     80  * Maximum length of a shared memory object name
     81  */
     82 #define MAX_SHM_NAME 255
     83 
     84 /**
     85  * Sent from LE to a plugin to initialize it (opens shm).
     86  */
     87 #define MESSAGE_INIT_STATE 0x00
     88 
     89 /**
     90  * IPC message send to plugin to initialize SHM.
     91  */
     92 struct InitMessage
     93 {
     94   /**
     95    * Set to #MESSAGE_INIT_STATE.
     96    */
     97   unsigned char opcode;
     98 
     99   /**
    100    * Always zero.
    101    */
    102   unsigned char reserved;
    103 
    104   /**
    105    * Always zero.
    106    */
    107   uint16_t reserved2;
    108 
    109   /**
    110    * Name of the shared-memory name.
    111    */
    112   uint32_t shm_name_length;
    113 
    114   /**
    115    * Maximum size of the shm map.
    116    */
    117   uint32_t shm_map_size;
    118 
    119   /* followed by name of the SHM */
    120 };
    121 
    122 
    123 /**
    124  * Sent from LE to a plugin to tell it extracting
    125  * can now start.  The SHM will point to offset 0
    126  * of the file.
    127  */
    128 #define MESSAGE_EXTRACT_START 0x01
    129 
    130 /**
    131  * IPC message send to plugin to start extracting.
    132  */
    133 struct StartMessage
    134 {
    135   /**
    136    * Set to #MESSAGE_EXTRACT_START.
    137    */
    138   unsigned char opcode;
    139 
    140   /**
    141    * Always zero.
    142    */
    143   unsigned char reserved;
    144 
    145   /**
    146    * Always zero.
    147    */
    148   uint16_t reserved2;
    149 
    150   /**
    151    * Number of bytes ready in SHM.
    152    */
    153   uint32_t shm_ready_bytes;
    154 
    155   /**
    156    * Overall size of the file.
    157    */
    158   uint64_t file_size;
    159 
    160 };
    161 
    162 /**
    163  * Sent from LE to a plugin to tell it that shm contents
    164  * were updated.
    165  */
    166 #define MESSAGE_UPDATED_SHM 0x02
    167 
    168 /**
    169  * IPC message send to plugin to notify it about a change in the SHM.
    170  */
    171 struct UpdateMessage
    172 {
    173   /**
    174    * Set to #MESSAGE_UPDATED_SHM.
    175    */
    176   unsigned char opcode;
    177 
    178   /**
    179    * Always zero.
    180    */
    181   unsigned char reserved;
    182 
    183   /**
    184    * Always zero.
    185    */
    186   uint16_t reserved2;
    187 
    188   /**
    189    * Number of bytes ready in SHM.
    190    */
    191   uint32_t shm_ready_bytes;
    192 
    193   /**
    194    * Offset of the shm in the overall file.
    195    */
    196   uint64_t shm_off;
    197 
    198   /**
    199    * Overall size of the file.
    200    */
    201   uint64_t file_size;
    202 
    203 };
    204 
    205 /**
    206  * Sent from plugin to LE to tell LE that plugin is done
    207  * analyzing current file and will send no more data.
    208  * No message format as this is only one byte.
    209  */
    210 #define MESSAGE_DONE 0x03
    211 
    212 /**
    213  * Sent from plugin to LE to tell LE that plugin needs
    214  * to read a different part of the source file.
    215  */
    216 #define MESSAGE_SEEK 0x04
    217 
    218 /**
    219  * IPC message send to plugin to start extracting.
    220  */
    221 struct SeekRequestMessage
    222 {
    223   /**
    224    * Set to #MESSAGE_SEEK.
    225    */
    226   unsigned char opcode;
    227 
    228   /**
    229    * Always zero.
    230    */
    231   unsigned char reserved;
    232 
    233   /**
    234    * 'whence' value for the seek operation;
    235    * 0 = SEEK_SET, 1 = SEEK_CUR, 2 = SEEK_END.
    236    * Note that 'SEEK_CUR' is never used here.
    237    */
    238   uint16_t whence;
    239 
    240   /**
    241    * Number of bytes requested for SHM.
    242    */
    243   uint32_t requested_bytes;
    244 
    245   /**
    246    * Requested offset; a positive value from the end of the
    247    * file is used of 'whence' is SEEK_END; a postive value
    248    * from the start is used of 'whence' is SEEK_SET.
    249    * 'SEEK_CUR' is never used.
    250    */
    251   uint64_t file_offset;
    252 
    253 };
    254 
    255 /**
    256  * Sent from plugin to LE to tell LE about metadata discovered.
    257  */
    258 #define MESSAGE_META 0x05
    259 
    260 /**
    261  * Plugin to parent: metadata discovered
    262  */
    263 struct MetaMessage
    264 {
    265   /**
    266    * Set to #MESSAGE_META.
    267    */
    268   unsigned char opcode;
    269 
    270   /**
    271    * Always zero.
    272    */
    273   unsigned char reserved;
    274 
    275   /**
    276    * An 'enum EXTRACTOR_MetaFormat' in 16 bits.
    277    */
    278   uint16_t meta_format;
    279 
    280   /**
    281    * An 'enum EXTRACTOR_MetaType' in 16 bits.
    282    */
    283   uint16_t meta_type;
    284 
    285   /**
    286    * Length of the mime type string.
    287    */
    288   uint16_t mime_length;
    289 
    290   /**
    291    * Size of the value.
    292    */
    293   uint32_t value_size;
    294 
    295   /* followed by mime_length bytes of 0-terminated
    296      mime-type (unless mime_length is 0) */
    297 
    298   /* followed by value_size bytes of value */
    299 
    300 };
    301 
    302 /**
    303  * Sent from LE to plugin to make plugin discard its state
    304  * (extraction aborted by application).  Only one byte.
    305  * Plugin should get ready for next 'StartMessage' after this.
    306  * (sent in response to META data or SEEK requests).
    307  */
    308 #define MESSAGE_DISCARD_STATE 0x06
    309 
    310 /**
    311  * Sent from LE to plugin to make plugin continue extraction.
    312  * (sent in response to META data).
    313  */
    314 #define MESSAGE_CONTINUE_EXTRACTING 0x07
    315 
    316 
    317 /**
    318  * Definition of an IPC communication channel with
    319  * some plugin.
    320  */
    321 struct EXTRACTOR_Channel;
    322 
    323 /**
    324  * Definition of a shared memory area.
    325  */
    326 struct EXTRACTOR_SharedMemory;
    327 
    328 
    329 /**
    330  * Create a shared memory area.
    331  *
    332  * @param size size of the shared area
    333  * @return NULL on error
    334  */
    335 struct EXTRACTOR_SharedMemory *
    336 EXTRACTOR_IPC_shared_memory_create_ (size_t size);
    337 
    338 
    339 /**
    340  * Destroy shared memory area.
    341  *
    342  * @param shm memory area to destroy
    343  * @return NULL on error
    344  */
    345 void
    346 EXTRACTOR_IPC_shared_memory_destroy_ (struct EXTRACTOR_SharedMemory *shm);
    347 
    348 
    349 /**
    350  * Change the reference counter for this shm instance.
    351  *
    352  * @param shm instance to update
    353  * @param delta value to change RC by
    354  * @return new RC
    355  */
    356 unsigned int
    357 EXTRACTOR_IPC_shared_memory_change_rc_ (struct EXTRACTOR_SharedMemory *shm,
    358                                         int delta);
    359 
    360 
    361 /**
    362  * Initialize shared memory area from data source.
    363  *
    364  * @param shm memory area to initialize
    365  * @param ds data source to use for initialization
    366  * @param off offset to use in data source
    367  * @param size number of bytes to copy
    368  * @return -1 on error, otherwise number of bytes copied
    369  */
    370 ssize_t
    371 EXTRACTOR_IPC_shared_memory_set_ (struct EXTRACTOR_SharedMemory *shm,
    372                                   struct EXTRACTOR_Datasource *ds,
    373                                   uint64_t off,
    374                                   size_t size);
    375 
    376 
    377 /**
    378  * Query datasource for current position
    379  *
    380  * @param ds data source to query
    381  * @return current position in the datasource or UINT_MAX on error
    382  */
    383 uint64_t
    384 EXTRACTOR_datasource_get_pos_ (struct EXTRACTOR_Datasource *ds);
    385 
    386 
    387 /**
    388  * Create a channel to communicate with a process wrapping
    389  * the plugin of the given name.  Starts the process as well.
    390  *
    391  * @param plugin the plugin
    392  * @param shm memory to share with the process
    393  * @return NULL on error, otherwise IPC channel
    394  */
    395 struct EXTRACTOR_Channel *
    396 EXTRACTOR_IPC_channel_create_ (struct EXTRACTOR_PluginList *plugin,
    397                                struct EXTRACTOR_SharedMemory *shm);
    398 
    399 
    400 /**
    401  * Destroy communication channel with a plugin/process.  Also
    402  * destroys the process.
    403  *
    404  * @param channel channel to communicate with the plugin
    405  */
    406 void
    407 EXTRACTOR_IPC_channel_destroy_ (struct EXTRACTOR_Channel *channel);
    408 
    409 
    410 /**
    411  * Send data via the given IPC channel (blocking).
    412  *
    413  * @param channel channel to communicate with the plugin
    414  * @param buf data to send
    415  * @param size number of bytes in buf to send
    416  * @return -1 on error, number of bytes sent on success
    417  *           (never does partial writes)
    418  */
    419 ssize_t
    420 EXTRACTOR_IPC_channel_send_ (struct EXTRACTOR_Channel *channel,
    421                              const void *data,
    422                              size_t size);
    423 
    424 
    425 /**
    426  * Handler for a message from one of the plugins.
    427  *
    428  * @param cls closure
    429  * @param plugin plugin of the channel sending the message
    430  * @param meta_type type of the meta data
    431  * @param meta_format format of the meta data
    432  * @param mime mime string send from the plugin
    433  * @param value 'data' send from the plugin
    434  * @param value_len number of bytes in 'value'
    435  */
    436 typedef void (*EXTRACTOR_ChannelMessageProcessor) (void *cls,
    437                                                    struct EXTRACTOR_PluginList *
    438                                                    plugin,
    439                                                    enum EXTRACTOR_MetaType
    440                                                    meta_type,
    441                                                    enum EXTRACTOR_MetaFormat
    442                                                    meta_format,
    443                                                    const char *mime,
    444                                                    const void *value,
    445                                                    size_t value_len);
    446 
    447 
    448 /**
    449  * Process a reply from channel (seek request, metadata and done message)
    450  *
    451  * @param plugin plugin this communication is about
    452  * @param buf buffer with data from IPC channel
    453  * @param size number of bytes in buffer
    454  * @param proc metadata callback
    455  * @param proc_cls callback cls
    456  * @return number of bytes processed, -1 on error
    457  */
    458 ssize_t
    459 EXTRACTOR_IPC_process_reply_ (struct EXTRACTOR_PluginList *plugin,
    460                               const void *data,
    461                               size_t size,
    462                               EXTRACTOR_ChannelMessageProcessor proc,
    463                               void *proc_cls);
    464 
    465 
    466 /**
    467  * Receive data from any of the given IPC channels (blocking).
    468  * Wait for one of the plugins to reply.
    469  *
    470  * @param channels array of channels, channels that break may be set to NULL
    471  * @param num_channels length of the 'channels' array
    472  * @param proc function to call to process messages (may be called
    473  *             more than once)
    474  * @param proc_cls closure for 'proc'
    475  * @return -1 on error (i.e. no response in 10s), 1 on success
    476  */
    477 int
    478 EXTRACTOR_IPC_channel_recv_ (struct EXTRACTOR_Channel **channels,
    479                              unsigned int num_channels,
    480                              EXTRACTOR_ChannelMessageProcessor proc,
    481                              void *proc_cls);
    482 
    483 
    484 #endif