extractor_ipc.h (12596B)
1 /* 2 This file is part of libextractor. 3 Copyright (C) 2012 Vidyut Samanta and Christian Grothoff 4 5 libextractor is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published 7 by the Free Software Foundation; either version 3, or (at your 8 option) any later version. 9 10 libextractor is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with libextractor; see the file COPYING. If not, write to the 17 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 Boston, MA 02110-1301, USA. 19 */ 20 /** 21 * @file main/extractor_ipc.h 22 * @brief IPC with plugin (OS-independent API) 23 * @author Christian Grothoff 24 * 25 * @detail 26 * The IPC communication between plugins and the main library works 27 * as follows. Each message begins with a 1-character opcode which 28 * specifies the message type. The main library starts the plugins 29 * by forking the helper process and establishes two pipes for 30 * communication in both directions. 31 * First, the main library send an 'INIT_STATE' message 32 * to the plugin. The start message specifies the name (and size) 33 * of a shared memory segment which will contain parts of the (uncompressed) 34 * data of the file that is being processed. The same shared memory 35 * segment is used throughout the lifetime of the plugin. 36 * 37 * Then, the following messages are exchanged for each file. 38 * First, an EXTRACT_START message is sent with the specific 39 * size of the file (or -1 if unknown) and the number of bytes 40 * ready in the shared memory segment. The plugin then answers 41 * with either: 42 * 1) MESSAGE_DONE to indicate that no further processing is 43 * required for this file; the IPC continues with the 44 * EXTRACT_START message for the next file afterwards; 45 * 2) MESSAGE_SEEK to indicate that the plugin would like to 46 * read data at a different offset; the main library can 47 * then either 48 * a) respond with a MESSAGE_DISCARD_STATE to 49 * tell the plugin to abort processing (the next message will 50 * then be another EXTRACT_START) 51 * b) respond with a MESSAGE_UPDATED_SHM which notifies the 52 * plugin that the shared memory segment was moved to a 53 * different location in the overall file; the target of the 54 * seek should now be within the new range (but does NOT have 55 * to be at the beginning of the seek) 56 * 3) MESSAGE_META to provide extracted meta data to the main 57 * library. The main library can then either: 58 * a) respond with a MESSAGE_DISCARD_STATE to 59 * tell the plugin to abort processing (the next message will 60 * then be another EXTRACT_START) 61 * b) respond with a MESSAGE_CONTINUE_EXTRACTING to 62 * tell the plugin to continue extracting meta data; in this 63 * case, the plugin is then expected to produce another 64 * MESSAGE_DONE, MESSAGE_SEEK or MESSAGE_META round of messages. 65 */ 66 #ifndef EXTRACTOR_IPC_H 67 #define EXTRACTOR_IPC_H 68 69 #include "extractor_datasource.h" 70 71 72 /** 73 * How long do we allow an individual meta data object to be? 74 * Used to guard against (broken) plugns causing us to use 75 * excessive amounts of memory. 76 */ 77 #define MAX_META_DATA 32 * 1024 * 1024 78 79 /** 80 * Maximum length of a shared memory object name 81 */ 82 #define MAX_SHM_NAME 255 83 84 /** 85 * Sent from LE to a plugin to initialize it (opens shm). 86 */ 87 #define MESSAGE_INIT_STATE 0x00 88 89 /** 90 * IPC message send to plugin to initialize SHM. 91 */ 92 struct InitMessage 93 { 94 /** 95 * Set to #MESSAGE_INIT_STATE. 96 */ 97 unsigned char opcode; 98 99 /** 100 * Always zero. 101 */ 102 unsigned char reserved; 103 104 /** 105 * Always zero. 106 */ 107 uint16_t reserved2; 108 109 /** 110 * Name of the shared-memory name. 111 */ 112 uint32_t shm_name_length; 113 114 /** 115 * Maximum size of the shm map. 116 */ 117 uint32_t shm_map_size; 118 119 /* followed by name of the SHM */ 120 }; 121 122 123 /** 124 * Sent from LE to a plugin to tell it extracting 125 * can now start. The SHM will point to offset 0 126 * of the file. 127 */ 128 #define MESSAGE_EXTRACT_START 0x01 129 130 /** 131 * IPC message send to plugin to start extracting. 132 */ 133 struct StartMessage 134 { 135 /** 136 * Set to #MESSAGE_EXTRACT_START. 137 */ 138 unsigned char opcode; 139 140 /** 141 * Always zero. 142 */ 143 unsigned char reserved; 144 145 /** 146 * Always zero. 147 */ 148 uint16_t reserved2; 149 150 /** 151 * Number of bytes ready in SHM. 152 */ 153 uint32_t shm_ready_bytes; 154 155 /** 156 * Overall size of the file. 157 */ 158 uint64_t file_size; 159 160 }; 161 162 /** 163 * Sent from LE to a plugin to tell it that shm contents 164 * were updated. 165 */ 166 #define MESSAGE_UPDATED_SHM 0x02 167 168 /** 169 * IPC message send to plugin to notify it about a change in the SHM. 170 */ 171 struct UpdateMessage 172 { 173 /** 174 * Set to #MESSAGE_UPDATED_SHM. 175 */ 176 unsigned char opcode; 177 178 /** 179 * Always zero. 180 */ 181 unsigned char reserved; 182 183 /** 184 * Always zero. 185 */ 186 uint16_t reserved2; 187 188 /** 189 * Number of bytes ready in SHM. 190 */ 191 uint32_t shm_ready_bytes; 192 193 /** 194 * Offset of the shm in the overall file. 195 */ 196 uint64_t shm_off; 197 198 /** 199 * Overall size of the file. 200 */ 201 uint64_t file_size; 202 203 }; 204 205 /** 206 * Sent from plugin to LE to tell LE that plugin is done 207 * analyzing current file and will send no more data. 208 * No message format as this is only one byte. 209 */ 210 #define MESSAGE_DONE 0x03 211 212 /** 213 * Sent from plugin to LE to tell LE that plugin needs 214 * to read a different part of the source file. 215 */ 216 #define MESSAGE_SEEK 0x04 217 218 /** 219 * IPC message send to plugin to start extracting. 220 */ 221 struct SeekRequestMessage 222 { 223 /** 224 * Set to #MESSAGE_SEEK. 225 */ 226 unsigned char opcode; 227 228 /** 229 * Always zero. 230 */ 231 unsigned char reserved; 232 233 /** 234 * 'whence' value for the seek operation; 235 * 0 = SEEK_SET, 1 = SEEK_CUR, 2 = SEEK_END. 236 * Note that 'SEEK_CUR' is never used here. 237 */ 238 uint16_t whence; 239 240 /** 241 * Number of bytes requested for SHM. 242 */ 243 uint32_t requested_bytes; 244 245 /** 246 * Requested offset; a positive value from the end of the 247 * file is used of 'whence' is SEEK_END; a postive value 248 * from the start is used of 'whence' is SEEK_SET. 249 * 'SEEK_CUR' is never used. 250 */ 251 uint64_t file_offset; 252 253 }; 254 255 /** 256 * Sent from plugin to LE to tell LE about metadata discovered. 257 */ 258 #define MESSAGE_META 0x05 259 260 /** 261 * Plugin to parent: metadata discovered 262 */ 263 struct MetaMessage 264 { 265 /** 266 * Set to #MESSAGE_META. 267 */ 268 unsigned char opcode; 269 270 /** 271 * Always zero. 272 */ 273 unsigned char reserved; 274 275 /** 276 * An 'enum EXTRACTOR_MetaFormat' in 16 bits. 277 */ 278 uint16_t meta_format; 279 280 /** 281 * An 'enum EXTRACTOR_MetaType' in 16 bits. 282 */ 283 uint16_t meta_type; 284 285 /** 286 * Length of the mime type string. 287 */ 288 uint16_t mime_length; 289 290 /** 291 * Size of the value. 292 */ 293 uint32_t value_size; 294 295 /* followed by mime_length bytes of 0-terminated 296 mime-type (unless mime_length is 0) */ 297 298 /* followed by value_size bytes of value */ 299 300 }; 301 302 /** 303 * Sent from LE to plugin to make plugin discard its state 304 * (extraction aborted by application). Only one byte. 305 * Plugin should get ready for next 'StartMessage' after this. 306 * (sent in response to META data or SEEK requests). 307 */ 308 #define MESSAGE_DISCARD_STATE 0x06 309 310 /** 311 * Sent from LE to plugin to make plugin continue extraction. 312 * (sent in response to META data). 313 */ 314 #define MESSAGE_CONTINUE_EXTRACTING 0x07 315 316 317 /** 318 * Definition of an IPC communication channel with 319 * some plugin. 320 */ 321 struct EXTRACTOR_Channel; 322 323 /** 324 * Definition of a shared memory area. 325 */ 326 struct EXTRACTOR_SharedMemory; 327 328 329 /** 330 * Create a shared memory area. 331 * 332 * @param size size of the shared area 333 * @return NULL on error 334 */ 335 struct EXTRACTOR_SharedMemory * 336 EXTRACTOR_IPC_shared_memory_create_ (size_t size); 337 338 339 /** 340 * Destroy shared memory area. 341 * 342 * @param shm memory area to destroy 343 * @return NULL on error 344 */ 345 void 346 EXTRACTOR_IPC_shared_memory_destroy_ (struct EXTRACTOR_SharedMemory *shm); 347 348 349 /** 350 * Change the reference counter for this shm instance. 351 * 352 * @param shm instance to update 353 * @param delta value to change RC by 354 * @return new RC 355 */ 356 unsigned int 357 EXTRACTOR_IPC_shared_memory_change_rc_ (struct EXTRACTOR_SharedMemory *shm, 358 int delta); 359 360 361 /** 362 * Initialize shared memory area from data source. 363 * 364 * @param shm memory area to initialize 365 * @param ds data source to use for initialization 366 * @param off offset to use in data source 367 * @param size number of bytes to copy 368 * @return -1 on error, otherwise number of bytes copied 369 */ 370 ssize_t 371 EXTRACTOR_IPC_shared_memory_set_ (struct EXTRACTOR_SharedMemory *shm, 372 struct EXTRACTOR_Datasource *ds, 373 uint64_t off, 374 size_t size); 375 376 377 /** 378 * Query datasource for current position 379 * 380 * @param ds data source to query 381 * @return current position in the datasource or UINT_MAX on error 382 */ 383 uint64_t 384 EXTRACTOR_datasource_get_pos_ (struct EXTRACTOR_Datasource *ds); 385 386 387 /** 388 * Create a channel to communicate with a process wrapping 389 * the plugin of the given name. Starts the process as well. 390 * 391 * @param plugin the plugin 392 * @param shm memory to share with the process 393 * @return NULL on error, otherwise IPC channel 394 */ 395 struct EXTRACTOR_Channel * 396 EXTRACTOR_IPC_channel_create_ (struct EXTRACTOR_PluginList *plugin, 397 struct EXTRACTOR_SharedMemory *shm); 398 399 400 /** 401 * Destroy communication channel with a plugin/process. Also 402 * destroys the process. 403 * 404 * @param channel channel to communicate with the plugin 405 */ 406 void 407 EXTRACTOR_IPC_channel_destroy_ (struct EXTRACTOR_Channel *channel); 408 409 410 /** 411 * Send data via the given IPC channel (blocking). 412 * 413 * @param channel channel to communicate with the plugin 414 * @param buf data to send 415 * @param size number of bytes in buf to send 416 * @return -1 on error, number of bytes sent on success 417 * (never does partial writes) 418 */ 419 ssize_t 420 EXTRACTOR_IPC_channel_send_ (struct EXTRACTOR_Channel *channel, 421 const void *data, 422 size_t size); 423 424 425 /** 426 * Handler for a message from one of the plugins. 427 * 428 * @param cls closure 429 * @param plugin plugin of the channel sending the message 430 * @param meta_type type of the meta data 431 * @param meta_format format of the meta data 432 * @param mime mime string send from the plugin 433 * @param value 'data' send from the plugin 434 * @param value_len number of bytes in 'value' 435 */ 436 typedef void (*EXTRACTOR_ChannelMessageProcessor) (void *cls, 437 struct EXTRACTOR_PluginList * 438 plugin, 439 enum EXTRACTOR_MetaType 440 meta_type, 441 enum EXTRACTOR_MetaFormat 442 meta_format, 443 const char *mime, 444 const void *value, 445 size_t value_len); 446 447 448 /** 449 * Process a reply from channel (seek request, metadata and done message) 450 * 451 * @param plugin plugin this communication is about 452 * @param buf buffer with data from IPC channel 453 * @param size number of bytes in buffer 454 * @param proc metadata callback 455 * @param proc_cls callback cls 456 * @return number of bytes processed, -1 on error 457 */ 458 ssize_t 459 EXTRACTOR_IPC_process_reply_ (struct EXTRACTOR_PluginList *plugin, 460 const void *data, 461 size_t size, 462 EXTRACTOR_ChannelMessageProcessor proc, 463 void *proc_cls); 464 465 466 /** 467 * Receive data from any of the given IPC channels (blocking). 468 * Wait for one of the plugins to reply. 469 * 470 * @param channels array of channels, channels that break may be set to NULL 471 * @param num_channels length of the 'channels' array 472 * @param proc function to call to process messages (may be called 473 * more than once) 474 * @param proc_cls closure for 'proc' 475 * @return -1 on error (i.e. no response in 10s), 1 on success 476 */ 477 int 478 EXTRACTOR_IPC_channel_recv_ (struct EXTRACTOR_Channel **channels, 479 unsigned int num_channels, 480 EXTRACTOR_ChannelMessageProcessor proc, 481 void *proc_cls); 482 483 484 #endif