extractor_plugin_main.c (19054B)
1 /* 2 This file is part of libextractor. 3 Copyright (C) 2012 Vidyut Samanta and Christian Grothoff 4 5 libextractor is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published 7 by the Free Software Foundation; either version 3, or (at your 8 option) any later version. 9 10 libextractor is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with libextractor; see the file COPYING. If not, write to the 17 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 Boston, MA 02110-1301, USA. 19 */ 20 /** 21 * @file main/extractor_plugin_main.c 22 * @brief main loop for an out-of-process plugin 23 * @author Christian Grothoff 24 */ 25 #include "platform.h" 26 #include "extractor.h" 27 #include "extractor_common.h" 28 #include "extractor_datasource.h" 29 #include "extractor_plugins.h" 30 #include "extractor_ipc.h" 31 #include "extractor_logging.h" 32 #include "extractor_plugin_main.h" 33 #include <dirent.h> 34 #include <sys/types.h> 35 #if GNU_LINUX 36 #include <sys/wait.h> 37 #include <sys/shm.h> 38 #include <signal.h> 39 #endif 40 41 #if WINDOWS 42 #define SHM_ID HANDLE 43 #define INVALID_SHM_ID NULL 44 #else 45 #define SHM_ID int 46 #define INVALID_SHM_ID -1 47 #endif 48 49 /** 50 * Closure we use for processing requests inside the helper process. 51 */ 52 struct ProcessingContext 53 { 54 /** 55 * Our plugin handle. 56 */ 57 struct EXTRACTOR_PluginList *plugin; 58 59 /** 60 * Shared memory area. 61 */ 62 void *shm; 63 64 /** 65 * Overall size of the file. 66 */ 67 uint64_t file_size; 68 69 /** 70 * Current read offset when reading from the SHM. 71 */ 72 uint64_t read_position; 73 74 /** 75 * Current offset of the SHM in the file. 76 */ 77 uint64_t shm_off; 78 79 /** 80 * Handle to the shared memory. 81 */ 82 SHM_ID shm_id; 83 84 /** 85 * Size of the shared memory map. 86 */ 87 uint32_t shm_map_size; 88 89 /** 90 * Number of bytes ready in SHM. 91 */ 92 uint32_t shm_ready_bytes; 93 94 /** 95 * Input stream. 96 */ 97 int in; 98 99 /** 100 * Output stream. 101 */ 102 int out; 103 }; 104 105 106 /** 107 * Moves current absolute buffer position to 'pos' in 'whence' mode. 108 * Will move logical position without shifting the buffer, if possible. 109 * Will not move beyond the end of file. 110 * 111 * @param plugin plugin context 112 * @param pos position to move to 113 * @param whence seek mode (SEEK_CUR, SEEK_SET, SEEK_END) 114 * @return new absolute position, -1 on error 115 */ 116 static int64_t 117 plugin_env_seek (void *cls, 118 int64_t pos, 119 int whence) 120 { 121 struct ProcessingContext *pc = cls; 122 struct SeekRequestMessage srm = { 123 .opcode = MESSAGE_SEEK 124 }; 125 struct UpdateMessage um; 126 uint64_t npos; 127 unsigned char reply; 128 uint16_t wval; 129 130 switch (whence) 131 { 132 case SEEK_CUR: 133 if ( (pos < 0) && (pc->read_position < -pos) ) 134 { 135 LOG ("Invalid seek operation\n"); 136 return -1; 137 } 138 if ((pos > 0) && ((pc->read_position + pos < pc->read_position) || 139 (pc->read_position + pos > pc->file_size))) 140 { 141 LOG ("Invalid seek operation\n"); 142 return -1; 143 } 144 npos = (uint64_t) (pc->read_position + pos); 145 wval = 0; 146 break; 147 case SEEK_END: 148 if (pos > 0) 149 { 150 LOG ("Invalid seek operation\n"); 151 return -1; 152 } 153 if (UINT64_MAX == pc->file_size) 154 { 155 wval = 2; 156 npos = (uint64_t) -pos; 157 break; 158 } 159 pos = (int64_t) (pc->file_size + pos); 160 /* fall-through! */ 161 case SEEK_SET: 162 if ( (pos < 0) || (pc->file_size < pos) ) 163 { 164 LOG ("Invalid seek operation\n"); 165 return -1; 166 } 167 npos = (uint64_t) pos; 168 wval = 0; 169 break; 170 default: 171 LOG ("Invalid seek operation\n"); 172 return -1; 173 } 174 if ( (pc->shm_off <= npos) && 175 (pc->shm_off + pc->shm_ready_bytes > npos) && 176 (0 == wval) ) 177 { 178 pc->read_position = npos; 179 return (int64_t) npos; 180 } 181 /* need to seek */ 182 srm.opcode = MESSAGE_SEEK; 183 srm.reserved = 0; 184 srm.whence = wval; 185 srm.requested_bytes = pc->shm_map_size; 186 if (0 == wval) 187 { 188 if (srm.requested_bytes > pc->file_size - npos) 189 srm.requested_bytes = pc->file_size - npos; 190 } 191 else 192 { 193 srm.requested_bytes = npos; 194 } 195 srm.file_offset = npos; 196 if (-1 == EXTRACTOR_write_all_ (pc->out, &srm, sizeof (srm))) 197 { 198 LOG ("Failed to send MESSAGE_SEEK\n"); 199 return -1; 200 } 201 if (-1 == 202 EXTRACTOR_read_all_ (pc->in, 203 &reply, sizeof (reply))) 204 { 205 LOG ("Plugin `%s' failed to read response to MESSAGE_SEEK\n", 206 pc->plugin->short_libname); 207 return -1; 208 } 209 if (MESSAGE_UPDATED_SHM != reply) 210 { 211 LOG ("Unexpected reply %d to seek\n", reply); 212 return -1; /* was likely a MESSAGE_DISCARD_STATE */ 213 } 214 if (-1 == EXTRACTOR_read_all_ (pc->in, &um.reserved, sizeof (um) - 1)) 215 { 216 LOG ("Failed to read MESSAGE_UPDATED_SHM\n"); 217 return -1; 218 } 219 pc->shm_off = um.shm_off; 220 pc->shm_ready_bytes = um.shm_ready_bytes; 221 pc->file_size = um.file_size; 222 if (2 == wval) 223 { 224 /* convert offset to be absolute from beginning of the file */ 225 npos = pc->file_size - npos; 226 } 227 if ( (pc->shm_off <= npos) && 228 ((pc->shm_off + pc->shm_ready_bytes > npos) || 229 (pc->file_size == pc->shm_off)) ) 230 { 231 pc->read_position = npos; 232 return (int64_t) npos; 233 } 234 /* oops, serious missunderstanding, we asked to seek 235 and then were notified about a different position!? */ 236 LOG ( 237 "Plugin `%s' got invalid MESSAGE_UPDATED_SHM in response to my %d-seek (%llu not in %llu-%llu)\n", 238 pc->plugin->short_libname, 239 (int) wval, 240 (unsigned long long) npos, 241 (unsigned long long) pc->shm_off, 242 (unsigned long long) pc->shm_off + pc->shm_ready_bytes); 243 return -1; 244 } 245 246 247 /** 248 * Fills @a data with a pointer to the data buffer. 249 * 250 * @param plugin plugin context 251 * @param data location to store data pointer 252 * @param count number of bytes to read 253 * @return number of bytes (<= count) available in @a data, -1 on error 254 */ 255 static ssize_t 256 plugin_env_read (void *cls, 257 void **data, 258 size_t count) 259 { 260 struct ProcessingContext *pc = cls; 261 unsigned char *dp; 262 263 *data = NULL; 264 if ( (count + pc->read_position > pc->file_size) || 265 (count + pc->read_position < pc->read_position) ) 266 count = pc->file_size - pc->read_position; 267 if ( ( ( (pc->read_position >= pc->shm_off + pc->shm_ready_bytes) && 268 (pc->read_position < pc->file_size)) || 269 (pc->read_position < pc->shm_off) ) && 270 (-1 == plugin_env_seek (pc, pc->read_position, SEEK_SET) ) ) 271 { 272 LOG ("Failed to seek to satisfy read\n"); 273 return -1; 274 } 275 if (pc->read_position + count > pc->shm_off + pc->shm_ready_bytes) 276 count = pc->shm_off + pc->shm_ready_bytes - pc->read_position; 277 dp = pc->shm; 278 *data = &dp[pc->read_position - pc->shm_off]; 279 pc->read_position += count; 280 return count; 281 } 282 283 284 /** 285 * Provide the overall file size to plugins. 286 * 287 * @param cls the 'struct ProcessingContext' 288 * @return overall file size of the current file 289 */ 290 static uint64_t 291 plugin_env_get_size (void *cls) 292 { 293 struct ProcessingContext *pc = cls; 294 295 return pc->file_size; 296 } 297 298 299 /** 300 * Function called by a plugin in a child process. Transmits 301 * the meta data back to the parent process. 302 * 303 * @param cls closure, "struct ProcessingContext" with the FD for transmission 304 * @param plugin_name name of the plugin that produced this value; 305 * special values can be used (i.e. '<zlib>' for zlib being 306 * used in the main libextractor library and yielding 307 * meta data). 308 * @param type libextractor-type describing the meta data 309 * @param format basic format information about data 310 * @param data_mime_type mime-type of data (not of the original file); 311 * can be NULL (if mime-type is not known) 312 * @param data actual meta-data found 313 * @param data_len number of bytes in data 314 * @return 0 to continue extracting, 1 to abort (transmission error) 315 */ 316 static int 317 plugin_env_send_proc (void *cls, 318 const char *plugin_name, 319 enum EXTRACTOR_MetaType type, 320 enum EXTRACTOR_MetaFormat format, 321 const char *data_mime_type, 322 const char *data, 323 size_t data_len) 324 { 325 struct ProcessingContext *pc = cls; 326 struct MetaMessage mm; 327 size_t mime_len; 328 unsigned char reply; 329 330 if (data_len > MAX_META_DATA) 331 return 0; /* skip, too large */ 332 if (NULL == data_mime_type) 333 mime_len = 0; 334 else 335 mime_len = strlen (data_mime_type) + 1; 336 if (mime_len > UINT16_MAX) 337 mime_len = UINT16_MAX; 338 mm.opcode = MESSAGE_META; 339 mm.reserved = 0; 340 mm.meta_type = type; 341 mm.meta_format = (uint16_t) format; 342 mm.mime_length = (uint16_t) mime_len; 343 mm.value_size = (uint32_t) data_len; 344 if ( (sizeof (mm) != 345 EXTRACTOR_write_all_ (pc->out, 346 &mm, sizeof (mm))) || 347 (mime_len != 348 EXTRACTOR_write_all_ (pc->out, 349 data_mime_type, mime_len)) || 350 (data_len != 351 EXTRACTOR_write_all_ (pc->out, 352 data, data_len)) ) 353 { 354 LOG ("Failed to send meta message\n"); 355 return 1; 356 } 357 if (-1 == 358 EXTRACTOR_read_all_ (pc->in, 359 &reply, sizeof (reply))) 360 { 361 LOG ("Failed to read response to meta message\n"); 362 return 1; 363 } 364 if (MESSAGE_DISCARD_STATE == reply) 365 return 1; 366 if (MESSAGE_CONTINUE_EXTRACTING != reply) 367 { 368 LOG ("Received unexpected reply to meta data: %d\n", reply); 369 return 1; 370 } 371 return 0; 372 } 373 374 375 /** 376 * Handle an init message. The opcode itself has already been read. 377 * 378 * @param pc processing context 379 * @return 0 on success, -1 on error 380 */ 381 static int 382 handle_init_message (struct ProcessingContext *pc) 383 { 384 struct InitMessage init = { 385 .opcode = MESSAGE_INIT_STATE 386 }; 387 388 if (NULL != pc->shm) 389 { 390 LOG ("Cannot handle 'init' message, have already been initialized\n"); 391 return -1; 392 } 393 if (sizeof (struct InitMessage) - 1 394 != EXTRACTOR_read_all_ (pc->in, 395 &init.reserved, 396 sizeof (struct InitMessage) - 1)) 397 { 398 LOG ("Failed to read 'init' message\n"); 399 return -1; 400 } 401 if (init.shm_name_length > MAX_SHM_NAME) 402 { 403 LOG ("Invalid 'init' message\n"); 404 return -1; 405 } 406 { 407 char shm_name[init.shm_name_length + 1]; 408 409 if (init.shm_name_length 410 != EXTRACTOR_read_all_ (pc->in, 411 shm_name, 412 init.shm_name_length)) 413 { 414 LOG ("Failed to read 'init' message\n"); 415 return -1; 416 } 417 shm_name[init.shm_name_length] = '\0'; 418 419 pc->shm_map_size = init.shm_map_size; 420 #if WINDOWS 421 /* FIXME: storing pointer in an int */ 422 pc->shm_id = OpenFileMapping (FILE_MAP_READ, FALSE, shm_name); 423 if (NULL == pc->shm_id) 424 return -1; 425 pc->shm = MapViewOfFile (pc->shm_id, FILE_MAP_READ, 0, 0, 0); 426 if (NULL == pc->shm) 427 { 428 CloseHandle (pc->shm_id); 429 return -1; 430 } 431 #else 432 pc->shm_id = shm_open (shm_name, O_RDONLY, 0); 433 if (-1 == pc->shm_id) 434 { 435 LOG_STRERROR_FILE ("open", shm_name); 436 return -1; 437 } 438 pc->shm = mmap (NULL, 439 pc->shm_map_size, 440 PROT_READ, 441 MAP_SHARED, 442 pc->shm_id, 0); 443 if ( ((void*) -1) == pc->shm) 444 { 445 LOG_STRERROR_FILE ("mmap", shm_name); 446 return -1; 447 } 448 #endif 449 } 450 return 0; 451 } 452 453 454 /** 455 * Handle a start message. The opcode itself has already been read. 456 * 457 * @param pc processing context 458 * @return 0 on success, -1 on error 459 */ 460 static int 461 handle_start_message (struct ProcessingContext *pc) 462 { 463 struct StartMessage start = { 464 .opcode = MESSAGE_EXTRACT_START 465 }; 466 struct EXTRACTOR_ExtractContext ec; 467 char done; 468 469 if (sizeof (struct StartMessage) - 1 470 != EXTRACTOR_read_all_ (pc->in, 471 &start.reserved, 472 sizeof (struct StartMessage) - 1)) 473 { 474 LOG ("Failed to read 'start' message\n"); 475 return -1; 476 } 477 pc->shm_ready_bytes = start.shm_ready_bytes; 478 pc->file_size = start.file_size; 479 pc->read_position = 0; 480 pc->shm_off = 0; 481 ec.cls = pc; 482 ec.config = pc->plugin->plugin_options; 483 ec.read = &plugin_env_read; 484 ec.seek = &plugin_env_seek; 485 ec.get_size = &plugin_env_get_size; 486 ec.proc = &plugin_env_send_proc; 487 pc->plugin->extract_method (&ec); 488 done = MESSAGE_DONE; 489 if (-1 == EXTRACTOR_write_all_ (pc->out, &done, sizeof (done))) 490 { 491 LOG ("Failed to write 'done' message\n"); 492 return -1; 493 } 494 if ( (NULL != pc->plugin->specials) && 495 (NULL != strstr (pc->plugin->specials, "force-kill")) ) 496 { 497 /* we're required to die after each file since this 498 plugin only supports a single file at a time */ 499 #if ! WINDOWS 500 fsync (pc->out); 501 #else 502 _commit (pc->out); 503 #endif 504 _exit (0); 505 } 506 return 0; 507 } 508 509 510 /** 511 * Main loop function for plugins. Reads a message from the plugin 512 * input pipe and acts on it. 513 * 514 * @param pc processing context 515 */ 516 static void 517 process_requests (struct ProcessingContext *pc) 518 { 519 while (1) 520 { 521 unsigned char code; 522 523 if (1 != EXTRACTOR_read_all_ (pc->in, &code, 1)) 524 { 525 LOG ("Failed to read next request\n"); 526 break; 527 } 528 switch (code) 529 { 530 case MESSAGE_INIT_STATE: 531 if (0 != handle_init_message (pc)) 532 { 533 LOG ("Failure to handle INIT\n"); 534 return; 535 } 536 break; 537 case MESSAGE_EXTRACT_START: 538 if (0 != handle_start_message (pc)) 539 { 540 LOG ("Failure to handle START\n"); 541 return; 542 } 543 break; 544 case MESSAGE_UPDATED_SHM: 545 LOG ("Illegal message\n"); 546 /* not allowed here, we're not waiting for SHM to move! */ 547 return; 548 case MESSAGE_DISCARD_STATE: 549 /* odd, we're already in the start state... */ 550 continue; 551 default: 552 LOG ("Received invalid message %d\n", (int) code); 553 /* error, unexpected message */ 554 return; 555 } 556 } 557 } 558 559 560 /** 561 * Open '/dev/null' and make the result the given 562 * file descriptor. 563 * 564 * @param target_fd desired FD to point to /dev/null 565 * @param flags open flags (O_RDONLY, O_WRONLY) 566 */ 567 static void 568 open_dev_null (int target_fd, 569 int flags) 570 { 571 int fd; 572 573 #ifndef WINDOWS 574 fd = open ("/dev/null", flags); 575 #else 576 fd = open ("\\\\?\\NUL", flags); 577 #endif 578 if (-1 == fd) 579 { 580 LOG_STRERROR_FILE ("open", "/dev/null"); 581 return; /* good luck */ 582 } 583 if (fd == target_fd) 584 return; /* already done */ 585 if (-1 == dup2 (fd, target_fd)) 586 { 587 LOG_STRERROR ("dup2"); 588 (void) close (fd); 589 return; /* good luck */ 590 } 591 /* close original result from 'open' */ 592 if (0 != close (fd)) 593 LOG_STRERROR ("close"); 594 } 595 596 597 /** 598 * 'main' function of the child process. Loads the plugin, 599 * sets up its in and out pipes, then runs the request serving function. 600 * 601 * @param plugin extractor plugin to use 602 * @param in stream to read from 603 * @param out stream to write to 604 */ 605 void 606 EXTRACTOR_plugin_main_ (struct EXTRACTOR_PluginList *plugin, 607 int in, int out) 608 { 609 struct ProcessingContext pc; 610 611 if (0 != EXTRACTOR_plugin_load_ (plugin)) 612 { 613 #if DEBUG 614 fprintf (stderr, "Plugin `%s' failed to load!\n", 615 plugin->short_libname); 616 #endif 617 return; 618 } 619 if ( (NULL != plugin->specials) && 620 (NULL != strstr (plugin->specials, "close-stderr"))) 621 { 622 if (0 != close (2)) 623 LOG_STRERROR ("close"); 624 open_dev_null (2, O_WRONLY); 625 } 626 if ( (NULL != plugin->specials) && 627 (NULL != strstr (plugin->specials, "close-stdout"))) 628 { 629 if (0 != close (1)) 630 LOG_STRERROR ("close"); 631 open_dev_null (1, O_WRONLY); 632 } 633 pc.plugin = plugin; 634 pc.in = in; 635 pc.out = out; 636 pc.shm_id = INVALID_SHM_ID; 637 pc.shm = NULL; 638 pc.shm_map_size = 0; 639 process_requests (&pc); 640 LOG ("IPC error; plugin `%s' terminates!\n", 641 plugin->short_libname); 642 #if WINDOWS 643 if (NULL != pc.shm) 644 UnmapViewOfFile (pc.shm); 645 if (NULL != pc.shm_id) 646 CloseHandle (pc.shm_id); 647 #else 648 if ( (NULL != pc.shm) && 649 (((void*) 1) != pc.shm) ) 650 munmap (pc.shm, pc.shm_map_size); 651 if (-1 != pc.shm_id) 652 { 653 if (0 != close (pc.shm_id)) 654 LOG_STRERROR ("close"); 655 } 656 #endif 657 } 658 659 660 #if WINDOWS 661 /** 662 * Reads plugin data from the LE server process. 663 * Also initializes allocation granularity (duh...). 664 * 665 * @param fd the pipe to read from 666 * @return newly allocated plugin context 667 */ 668 static struct EXTRACTOR_PluginList * 669 read_plugin_data (int fd) 670 { 671 struct EXTRACTOR_PluginList *ret; 672 SYSTEM_INFO si; 673 size_t i; 674 675 // FIXME: check for errors from 'EXTRACTOR_read_all_'! 676 if (NULL == (ret = malloc (sizeof (struct EXTRACTOR_PluginList)))) 677 { 678 LOG_STRERROR ("malloc"); 679 return NULL; 680 } 681 memset (ret, 0, sizeof (struct EXTRACTOR_PluginList)); 682 /*GetSystemInfo (&si); 683 ret->allocation_granularity = si.dwAllocationGranularity;*/ 684 EXTRACTOR_read_all_ (fd, &i, sizeof (size_t)); 685 if (NULL == (ret->libname = malloc (i))) 686 { 687 free (ret); 688 return NULL; 689 } 690 EXTRACTOR_read_all_ (fd, ret->libname, i); 691 ret->libname[i - 1] = '\0'; 692 EXTRACTOR_read_all_ (fd, &i, sizeof (size_t)); 693 if (NULL == (ret->short_libname = malloc (i))) 694 { 695 free (ret->libname); 696 free (ret); 697 return NULL; 698 } 699 EXTRACTOR_read_all_ (fd, ret->short_libname, i); 700 ret->short_libname[i - 1] = '\0'; 701 EXTRACTOR_read_all_ (fd, &i, sizeof (size_t)); 702 if (0 == i) 703 { 704 ret->plugin_options = NULL; 705 return ret; 706 } 707 if (NULL == (ret->plugin_options = malloc (i))) 708 { 709 free (ret->short_libname); 710 free (ret->libname); 711 free (ret); 712 return NULL; 713 } 714 EXTRACTOR_read_all_ (fd, ret->plugin_options, i); 715 ret->plugin_options[i - 1] = '\0'; 716 return ret; 717 } 718 719 720 /** 721 * FIXME: document. 722 */ 723 void CALLBACK 724 RundllEntryPoint (HWND hwnd, 725 HINSTANCE hinst, 726 LPSTR lpszCmdLine, 727 int nCmdShow) 728 { 729 struct EXTRACTOR_PluginList *plugin; 730 intptr_t in_h; 731 intptr_t out_h; 732 int in; 733 int out; 734 735 sscanf (lpszCmdLine, "%lu %lu", &in_h, &out_h); 736 in = _open_osfhandle (in_h, _O_RDONLY); 737 out = _open_osfhandle (out_h, 0); 738 setmode (in, _O_BINARY); 739 setmode (out, _O_BINARY); 740 if (NULL == (plugin = read_plugin_data (in))) 741 { 742 close (in); 743 close (out); 744 return; 745 } 746 EXTRACTOR_plugin_main_ (plugin, in, out); 747 close (in); 748 close (out); 749 /* libgobject may crash us hard if we LoadLibrary() it directly or 750 * indirectly, and then exit normally (causing FreeLibrary() to be 751 * called by the OS) or call FreeLibrary() on it directly or 752 * indirectly. 753 * By terminating here we alleviate that problem. 754 */TerminateProcess (GetCurrentProcess (), 0); 755 } 756 757 758 /** 759 * FIXME: document. 760 */ 761 void CALLBACK 762 RundllEntryPointA (HWND hwnd, 763 HINSTANCE hinst, 764 LPSTR lpszCmdLine, 765 int nCmdShow) 766 { 767 return RundllEntryPoint (hwnd, hinst, lpszCmdLine, nCmdShow); 768 } 769 770 771 #endif