aboutsummaryrefslogtreecommitdiff
path: root/src/main/extractor_plugins.h
blob: 84deb22b789e9829080c3535feadd68c39fc9968 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
/*
     This file is part of libextractor.
     (C) 2002, 2003, 2004, 2005, 2006, 2009, 2012 Vidyut Samanta and Christian Grothoff

     libextractor is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published
     by the Free Software Foundation; either version 2, or (at your
     option) any later version.

     libextractor is distributed in the hope that it will be useful, but
     WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     General Public License for more details.

     You should have received a copy of the GNU General Public License
     along with libextractor; see the file COPYING.  If not, write to the
     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
     Boston, MA 02111-1307, USA.
 */

#ifndef EXTRACTOR_PLUGINS_H
#define EXTRACTOR_PLUGINS_H

#include "platform.h"
#include "plibc.h"
#include "extractor.h"
#ifndef WINDOWS
#include <sys/wait.h>
#include <sys/shm.h>
#endif
#include <signal.h>
#include <ltdl.h>

/**
 * Linked list of extractor plugins.  An application builds this list
 * by telling libextractor to load various keyword-extraction
 * plugins. Libraries can also be unloaded (removed from this list,
 * see EXTRACTOR_plugin_remove).
 */
struct EXTRACTOR_PluginList
{
  /**
   * This is a linked list.
   */
  struct EXTRACTOR_PluginList *next;

  /**
   * Pointer to the plugin (as returned by lt_dlopen).
   */
  void * libraryHandle;

  /**
   * Name of the library (i.e., 'libextractor_foo.so')
   */
  char *libname;

  /**
   * Short name of the plugin (i.e., 'foo')
   */
  char *short_libname;
  
  /**
   * Pointer to the function used for meta data extraction.
   */
  EXTRACTOR_extract_method extract_method;

  /**
   * Options for the plugin.
   */
  char *plugin_options;

  /**
   * Special options for the plugin
   * (as returned by the plugin's "options" method;
   * typically NULL).
   */
  const char *specials;

  /**
   * Flags to control how the plugin is executed.
   */
  enum EXTRACTOR_Options flags;

  /**
   * Process ID of the child process for this plugin. 0 for none.
   */
#if !WINDOWS
  int cpid;
#else
  HANDLE hProcess;
#endif

  /**
   * Pipe used to communicate information to the plugin child process.
   * NULL if not initialized.
   */
#if !WINDOWS
  FILE *cpipe_in;
#else
  HANDLE cpipe_in;
#endif

  /**
   * Pipe used by plugin to read from its parent.
   */
  int pipe_in;

  /**
   * A position this plugin wants us to seek to. -1 if it's finished.
   * Starts at 0;
   */
  int64_t seek_request;

#if !WINDOWS
  /**
   * ID of the shm object
   */
  int shm_id;
#else
  /**
   * Handle of the shm object
   */
  HANDLE map_handle;
#endif

  /**
   * Used to pass cfs pointer to in-process plugin in OPMODE_DECOMPRESS
   */
  void *pass_cfs;

  /**
   * Uncompressed stream size. Initially -1, until file is fully decompressed
   * (for sources that are not compressed it is set from the start).
   */
  int64_t fsize;

  /**
   * Absolute position within the stream
   */
  int64_t fpos;

  /**
   * Pointer to the shared memory segment
   */
  unsigned char *shm_ptr;

  /**
   * Number of bytes in the segment
   */
  int64_t map_size;

  /**
   * Position within the segment
   */
  int64_t shm_pos;

#if !WINDOWS
  /**
   * Pipe used to read information about extracted meta data from
   * the plugin child process.  -1 if not initialized.
   */
  int cpipe_out;
#else
  /**
   * Pipe used to read information about extracted meta data from
   * the plugin child process.  -1 if not initialized.
   */
  HANDLE cpipe_out;
#endif

#if !WINDOWS
  /**
   * Page size. Mmap offset is a multiple of this number.
   */
  long allocation_granularity;
#else
  /**
   * Page size. Mmap offset is a multiple of this number.
   */
  DWORD allocation_granularity;
#endif

#if WINDOWS
  /**
   * A structure for overlapped reads on W32.
   */
  OVERLAPPED ov_read;

  /**
   * A structure for overlapped writes on W32.
   */
  OVERLAPPED ov_write;

  /**
   * A write buffer for overlapped writes on W32
   */
  unsigned char *ov_write_buffer;
#endif

  /**
   * Mode of operation. One of the OPMODE_* constants
   */
  uint8_t operation_mode;

  /**
   * 1 if plugin is currently in a recursive process_requests() call,
   * 0 otherwise
   */
  int waiting_for_update;
};


/**
 * Load a plugin.
 *
 * @param plugin plugin to load
 * @return 0 on success, -1 on error
 */
int
EXTRACTOR_plugin_load_ (struct EXTRACTOR_PluginList *plugin);

#endif /* EXTRACTOR_PLUGINS_H */