libextractor-mono

GNU libextractor
Log | Files | Refs | LICENSE

Extractor.cs (11565B)


      1 // Extractor.cs
      2 // 
      3 // Copyright (C) 2008, 2009 Patrick Ulbrich, zulu99@gmx.net
      4 //
      5 // This program is free software: you can redistribute it and/or modify
      6 // it under the terms of the GNU General Public License as published by
      7 // the Free Software Foundation, either version 3 of the License, or
      8 // (at your option) any later version.
      9 //
     10 // This program is distributed in the hope that it will be useful,
     11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13 // GNU General Public License for more details.
     14 //
     15 // You should have received a copy of the GNU General Public License
     16 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
     17 //
     18 
     19 // NOTE:
     20 //
     21 // The following functions have been implemented directly (based on the libextractor original code)
     22 // as a pinvoke call into the native library would involve a complicated conversion
     23 // of the managed Keyword[] array into a unmanaged linked list.
     24 // On top of that the native library would also try to free that list :-(.
     25 // The code of those functions is so simple that it isn't worth it anyway...
     26 //
     27 // EXTRACTOR_KeywordList * EXTRACTOR_removeDuplicateKeywords(EXTRACTOR_KeywordList * list, unsigned int options);
     28 // EXTRACTOR_KeywordList * EXTRACTOR_removeEmptyKeywords (EXTRACTOR_KeywordList * list);
     29 // EXTRACTOR_KeywordList * EXTRACTOR_removeKeywordsOfType(EXTRACTOR_KeywordList * list, EXTRACTOR_KeywordType type);
     30 // const char * EXTRACTOR_extractLast(EXTRACTOR_KeywordType type, EXTRACTOR_KeywordList * keywords);
     31 // const char * EXTRACTOR_extractLastByString(const char * type, EXTRACTOR_KeywordList * keywords);
     32 
     33 using System;
     34 using System.Collections.Generic;
     35 using System.Runtime.InteropServices;
     36 
     37 namespace LibExtractor
     38 {
     39 	public class Extractor : IDisposable
     40 	{
     41 		private IntPtr pExtractors;
     42 		private bool disposed;
     43 		
     44 		public Extractor() {
     45 			disposed = false;
     46 			pExtractors = IntPtr.Zero;
     47 		}
     48 		
     49 		~Extractor() {
     50 			Dispose(false);
     51 		}
     52 		
     53 		/// 
     54 		/// Instance members
     55 		///
     56 		public void LoadDefaultLibraries() {
     57 			EnsureNotDisposed();
     58 			if (pExtractors != IntPtr.Zero)
     59 				RemoveAllLibraries();
     60 				
     61 			pExtractors = EXTRACTOR_loadDefaultLibraries();
     62 		}
     63 
     64 		public void LoadConfigLibraries(string config) {
     65 			EnsureNotDisposed();
     66 			EnsureValidStringParam(config, "config");
     67 			// prev parameter may be null, so don't test for loaded extractors.
     68 			pExtractors = EXTRACTOR_loadConfigLibraries(pExtractors, config);
     69 		}
     70 		
     71 		public void AddLibrary(string library) {
     72 			EnsureNotDisposed();
     73 			EnsureValidStringParam(library, "library");
     74 			// prev parameter may be null, so don't test for loaded extractors.
     75 			pExtractors = EXTRACTOR_addLibrary(pExtractors, library);
     76 		}
     77 		
     78 		public void AddLibraryLast(string library) {
     79 			EnsureNotDisposed();
     80 			EnsureValidStringParam(library, "library");
     81 			// prev parameter may be null, so don't test for loaded extractors.
     82 			pExtractors = EXTRACTOR_addLibraryLast(pExtractors, library);
     83 		}
     84 		
     85 		public void RemoveLibrary(string library) {
     86 			EnsureNotDisposed();
     87 			EnsureValidStringParam(library, "library");
     88 			// prev parameter may be null, so don't test for loaded extractors.
     89 			pExtractors = EXTRACTOR_removeLibrary(pExtractors, library);
     90 		}
     91 		
     92 		public void RemoveAllLibraries() {
     93 			EnsureNotDisposed();
     94 			EXTRACTOR_removeAll(pExtractors);
     95 			pExtractors = IntPtr.Zero;
     96 		}
     97 		
     98 		public Keyword[] GetKeywords(string filename) {
     99 			EnsureNotDisposed();
    100 			EnsureValidStringParam(filename, "filename");
    101 			EnsureExtractors();
    102 			
    103 			List<Keyword> list = GetKeywordsInternal(EXTRACTOR_getKeywords(pExtractors, filename));
    104 			return list.ToArray();
    105 		}
    106 		
    107 		public Keyword[] GetKeywords(IntPtr data, int size) {
    108 			EnsureNotDisposed();
    109 			EnsureExtractors();
    110 			
    111 			if (data == IntPtr.Zero)
    112 				throw new ArgumentException("Data must not be a null pointer", "data");
    113 			if (size <= 0)
    114 				throw new ArgumentException("Size must be greater than 0", "size");
    115 			
    116 			List<Keyword> list = GetKeywordsInternal(EXTRACTOR_getKeywords2(pExtractors, data, size));
    117 			return list.ToArray();
    118 		}
    119 		
    120 		public Keyword[] GetKeywords(byte[] data) {
    121 			EnsureNotDisposed();
    122 			EnsureExtractors();
    123 			
    124 			if (data == null)
    125 				throw new ArgumentNullException("data");
    126 			if (data.Length == 0)
    127 				throw new ArgumentException("Length of data must be greater than 0", "data");
    128 				
    129 			IntPtr pMem = IntPtr.Zero;
    130 			try {
    131 				pMem = Marshal.AllocHGlobal(data.Length);
    132 				Marshal.Copy(data, 0, pMem, data.Length);
    133 				return GetKeywords(pMem, data.Length);
    134 			} finally {
    135 				if (pMem != IntPtr.Zero)
    136 					Marshal.FreeHGlobal(pMem);
    137 			}
    138 		}
    139 		
    140 		private List<Keyword> GetKeywordsInternal(IntPtr pKeywords) {
    141 			try {
    142 				List<Keyword> list = new List<Keyword>();
    143 				while(pKeywords != IntPtr.Zero) {
    144 					Keyword k = (Keyword)Marshal.PtrToStructure(pKeywords, typeof(Keyword));
    145 					list.Add(k);
    146 					pKeywords = k.next;
    147 				}
    148 				return list;
    149 			} finally {
    150 				if (pKeywords != IntPtr.Zero)
    151 					EXTRACTOR_freeKeywords(pKeywords);
    152 			}
    153 		}
    154 		
    155 		/// 
    156 		/// Static members
    157 		///
    158 		
    159 		// Returns an Extractor instance with the default library set loaded.
    160 		public static Extractor GetDefault() {
    161 			Extractor e = new Extractor();
    162 			e.LoadDefaultLibraries();
    163 			return e;
    164 		}
    165 		
    166 		public static string GetKeywordTypeAsString(KeywordType type) {
    167 			// NOTE : string does NOT need to be freed.
    168 			IntPtr pStr = EXTRACTOR_getKeywordTypeAsString(type);
    169 			string str = Marshal.PtrToStringAnsi(pStr);
    170 			return str;
    171 		}
    172 		
    173 		public static KeywordType GetHighestKeywordTypeNumber() {
    174 			return EXTRACTOR_getHighestKeywordTypeNumber();
    175 		}
    176 		
    177 		public static Keyword[] RemoveDuplicateKeywords(Keyword[] keywords, DuplicateOptions options) {
    178 			int removed = 0;
    179 			
    180 			for (int i = 0; i < keywords.Length; i++) {
    181 				Keyword current = keywords[i];
    182 				
    183 				if (current == null)
    184 					continue;
    185 				
    186 				KeywordType type = current.keywordType;
    187 				string keyword = current.keyword;
    188 				
    189 				for (int j = 0; j < keywords.Length; j++) {
    190 					Keyword pos = keywords[j];
    191 					
    192 					if ((i == j) || (pos == null))
    193 						continue;
    194 					
    195 					if ( (pos.keyword == keyword) &&
    196 					 ( (pos.keywordType == type) ||
    197 					   ( ((options & DuplicateOptions.DUPLICATES_TYPELESS) > 0) &&
    198 					     ( (pos.keywordType == KeywordType.EXTRACTOR_SPLIT) ||
    199 					       (type != KeywordType.EXTRACTOR_SPLIT)) ) ||
    200 					   ( ((options & DuplicateOptions.DUPLICATES_REMOVE_UNKNOWN) > 0) &&
    201 					     (pos.keywordType == KeywordType.EXTRACTOR_UNKNOWN)) ) ) {
    202 						
    203 						if (removed == 0) {
    204 							// do not modify the original array.
    205 							// lazy copy - copy only if the array
    206 							// will be modified.
    207 							keywords = CopyKeywords(keywords);
    208 						}
    209 						
    210 						// mark keyword as removed.
    211 						keywords[j] = null;
    212 						removed++;
    213 					}
    214 				}
    215 			}
    216 			
    217 			return RemoveNullKeywords(keywords, removed);
    218 		}
    219 		
    220 		public static Keyword[] RemoveEmptyKeywords(Keyword[] keywords) {
    221 			List<Keyword> lst = null;
    222 			
    223 			for (int i = 0; i < keywords.Length; i++) {
    224 				Keyword pos = keywords[i];
    225 				string keyword = pos.keyword;
    226 				bool allWhite = true;
    227 				
    228 				for (int j = 0; j < keyword.Length; j++) {
    229 					if (!char.IsWhiteSpace(keyword[j])) {
    230 						allWhite = false;
    231 						break;
    232 					}
    233 				}
    234 				
    235 				if (allWhite) {
    236 					if (lst == null) {
    237 						// lazy copy -
    238 						// copy only if keywords will actually be removed.
    239 						lst = new List<Keyword>(keywords.Length);
    240 						lst.AddRange(keywords);
    241 					}
    242 					lst.Remove(pos);
    243 				}
    244 			}
    245 			
    246 			if (lst == null)
    247 				return keywords;
    248 			else
    249 				return lst.ToArray();
    250 		}
    251 		
    252 		public static Keyword[] RemoveKeywordsOfType(Keyword[] keywords, KeywordType type) {
    253 			List<Keyword> lst = null;
    254 			
    255 			for (int i = 0; i < keywords.Length; i++) {
    256 				Keyword pos = keywords[i];
    257 				
    258 				if (pos.keywordType == type) {
    259 					if (lst == null) {
    260 						// lazy copy -
    261 						// copy only if keywords will actually be removed.
    262 						lst = new List<Keyword>(keywords.Length);
    263 						lst.AddRange(keywords);
    264 					}
    265 					lst.Remove(pos);
    266 				}
    267 			}
    268 			
    269 			if (lst == null)
    270 				return keywords;
    271 			else
    272 				return lst.ToArray();
    273 		}
    274 		
    275 		public static string ExtractLast(KeywordType type, Keyword[] keywords) {
    276 			string result = null;
    277 			for (int i = 0; i < keywords.Length; i++) {
    278 				Keyword pos = keywords[i];
    279 				if (pos.keywordType == type) {
    280 					result = pos.keyword;
    281 				}
    282 			}
    283 			return result;
    284 		}
    285 		
    286 		// NOTE : does not work with translated strings.
    287 		public static string ExtractLastByString(string type, Keyword[] keywords) {
    288 			string result = null;
    289 			for (int i = 0; i < keywords.Length; i++) {
    290 				Keyword pos = keywords[i];
    291 				if (GetKeywordTypeAsString(pos.keywordType) == type) {
    292 					result = pos.keyword;
    293 				}
    294 			}
    295 			return result;
    296 		}
    297 		
    298 		private static Keyword[] CopyKeywords(Keyword[] original) {
    299 			Keyword[] copy = new Keyword[original.Length];
    300 			Array.Copy(original, copy, original.Length);
    301 			return copy;
    302 		}
    303 		
    304 		private static Keyword[] RemoveNullKeywords(Keyword[] keywords, int nullCount) {
    305 			if (nullCount < 1)
    306 				return keywords;
    307 			
    308 			Keyword[] copy = new Keyword[keywords.Length - nullCount];
    309 			int n = 0;
    310 			
    311 			for (int i = 0; i < keywords.Length; i++) {
    312 				if (keywords[i] != null)
    313 					copy[n++] = keywords[i];
    314 			}
    315 			
    316 			return copy;
    317 		}
    318 		
    319 		/// 
    320 		/// Cleanup stuff
    321 		///
    322 		
    323 		#region IDisposable members
    324 		public void Dispose() {
    325 			Dispose(true);
    326 		}
    327 		
    328 		private void Dispose(bool disposing) {
    329 			if (!disposed) {
    330 				if (pExtractors != IntPtr.Zero)
    331 						RemoveAllLibraries();
    332 						
    333 				if (disposing) 						
    334 					GC.SuppressFinalize(this);
    335 			
    336 				disposed = true;
    337 			}
    338 		}
    339 		#endregion
    340 		
    341 		/// 
    342 		/// Helper methods
    343 		///
    344 		
    345 		private void EnsureNotDisposed() {
    346 			if (disposed)
    347 				throw new ObjectDisposedException("Extractor");
    348 		}
    349 		
    350 		private void EnsureExtractors() {
    351 			if (pExtractors == IntPtr.Zero)
    352 				throw new InvalidOperationException("No extractor libraries loaded");
    353 		}
    354 		
    355 		private void EnsureValidStringParam(string param, string paramName) {
    356 			if (param == null)
    357 				throw new ArgumentNullException(paramName);
    358 			if (param.Length == 0)
    359 				throw new ArgumentException("Parameter must not be null", paramName);
    360 		}
    361 		
    362 		/// 
    363 		/// Native libextractor imports
    364 		///
    365 		
    366 		#region Native imports
    367 		[DllImport("libextractor")]
    368 		private static extern IntPtr EXTRACTOR_loadDefaultLibraries();
    369 		
    370 		[DllImport("libextractor")]
    371 		private static extern IntPtr EXTRACTOR_loadConfigLibraries(IntPtr prev, string config);
    372 		
    373 		[DllImport("libextractor")]
    374 		private static extern IntPtr EXTRACTOR_addLibrary(IntPtr prev, string library);
    375 		
    376 		[DllImport("libextractor")]
    377 		private static extern IntPtr EXTRACTOR_addLibraryLast(IntPtr prev, string library);
    378 		
    379 		[DllImport("libextractor")]
    380 		private static extern IntPtr EXTRACTOR_removeLibrary(IntPtr prev, string library);
    381 			
    382 		[DllImport("libextractor")]
    383 		private static extern void EXTRACTOR_removeAll(IntPtr libraries);
    384 		
    385 		[DllImport("libextractor")]
    386 		private static extern IntPtr EXTRACTOR_getKeywords(IntPtr extractors, string filename);
    387 		
    388 		[DllImport("libextractor")]
    389 		private static extern IntPtr EXTRACTOR_getKeywords2(IntPtr extractors, IntPtr data, int size);
    390 		
    391 		[DllImport("libextractor")]
    392 		private static extern void EXTRACTOR_freeKeywords(IntPtr keywords);		
    393 		
    394 		[DllImport("libextractor")]
    395 		private static extern IntPtr EXTRACTOR_getKeywordTypeAsString(KeywordType type);
    396 		
    397 		[DllImport("libextractor")]
    398 		private static extern KeywordType EXTRACTOR_getHighestKeywordTypeNumber();
    399 		#endregion
    400 	}
    401 }