libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit 4e19b7b871238d11d114711251fa60d12f0ec0b9
parent 2b909c750f3cab80335d6fe54573020488bbd6b5
Author: Christian Grothoff <christian@grothoff.org>
Date:   Sat,  7 May 2005 19:40:22 +0000

use options for split

Diffstat:
Msrc/include/extractor.h | 39+++++++++++++++++++++------------------
Msrc/plugins/splitextractor.c | 36++++++++++++++++++++++--------------
2 files changed, 43 insertions(+), 32 deletions(-)

diff --git a/src/include/extractor.h b/src/include/extractor.h @@ -146,11 +146,12 @@ typedef struct EXTRACTOR_Keywords { * Signature of the extract method that each plugin * must provide. */ -typedef EXTRACTOR_KeywordList * (*ExtractMethod)(const char * filename, - char * data, - size_t filesize, - EXTRACTOR_KeywordList * next, - const char * options); +typedef EXTRACTOR_KeywordList * +(*ExtractMethod)(const char * filename, + char * data, + size_t filesize, + EXTRACTOR_KeywordList * next, + const char * options); /** * Linked list of extractor helper-libraries. An application @@ -179,12 +180,14 @@ EXTRACTOR_ExtractorList * EXTRACTOR_loadDefaultLibraries(); * Get the textual name of the keyword. * @return NULL if the type is not known */ -const char * EXTRACTOR_getKeywordTypeAsString(const EXTRACTOR_KeywordType type); +const char * +EXTRACTOR_getKeywordTypeAsString(const EXTRACTOR_KeywordType type); /** * Return the highest type number, exclusive as in [0,highest). */ -EXTRACTOR_KeywordType EXTRACTOR_getHighestKeywordTypeNumber(); +EXTRACTOR_KeywordType +EXTRACTOR_getHighestKeywordTypeNumber(); /** * Load multiple libraries as specified by the user. @@ -202,7 +205,7 @@ EXTRACTOR_KeywordType EXTRACTOR_getHighestKeywordTypeNumber(); */ EXTRACTOR_ExtractorList * EXTRACTOR_loadConfigLibraries(EXTRACTOR_ExtractorList * prev, - const char * config); + const char * config); /** * Add a library for keyword extraction. @@ -212,7 +215,7 @@ EXTRACTOR_loadConfigLibraries(EXTRACTOR_ExtractorList * prev, */ EXTRACTOR_ExtractorList * EXTRACTOR_addLibrary(EXTRACTOR_ExtractorList * prev, - const char * library); + const char * library); /** * Add a library for keyword extraction at the END of the list. @@ -223,8 +226,8 @@ EXTRACTOR_addLibrary(EXTRACTOR_ExtractorList * prev, */ EXTRACTOR_ExtractorList * EXTRACTOR_addLibraryLast(EXTRACTOR_ExtractorList * prev, - const char * library); - + const char * library); + /** * Remove a library for keyword extraction. * @param prev the current list of libraries @@ -233,7 +236,7 @@ EXTRACTOR_addLibraryLast(EXTRACTOR_ExtractorList * prev, */ EXTRACTOR_ExtractorList * EXTRACTOR_removeLibrary(EXTRACTOR_ExtractorList * prev, - const char * library); + const char * library); /** * Remove all extractors. @@ -250,7 +253,7 @@ void EXTRACTOR_removeAll(EXTRACTOR_ExtractorList * libraries); */ EXTRACTOR_KeywordList * EXTRACTOR_getKeywords(EXTRACTOR_ExtractorList * extractor, - const char * filename); + const char * filename); /** @@ -261,7 +264,7 @@ EXTRACTOR_getKeywords(EXTRACTOR_ExtractorList * extractor, */ EXTRACTOR_KeywordList * EXTRACTOR_removeDuplicateKeywords(EXTRACTOR_KeywordList * list, - const unsigned int options); + const unsigned int options); /** @@ -279,7 +282,7 @@ EXTRACTOR_removeEmptyKeywords (EXTRACTOR_KeywordList * list); * @param keywords the list of keywords to print, may be NULL */ void EXTRACTOR_printKeywords(FILE * handle, - EXTRACTOR_KeywordList * keywords); + EXTRACTOR_KeywordList * keywords); /** * Free the memory occupied by the keyword list (and the @@ -298,7 +301,7 @@ void EXTRACTOR_freeKeywords(EXTRACTOR_KeywordList * keywords); * invalid once the keyword list is freed. */ const char * EXTRACTOR_extractLast(const EXTRACTOR_KeywordType type, - EXTRACTOR_KeywordList * keywords); + EXTRACTOR_KeywordList * keywords); /** * Extract the last keyword of the given string from the keyword list. @@ -309,8 +312,8 @@ const char * EXTRACTOR_extractLast(const EXTRACTOR_KeywordType type, * not be freed or manipulated by the client. It will become * invalid once the keyword list is freed. */ -const char * EXTRACTOR_extractLastByString (const char * type, - EXTRACTOR_KeywordList * keywords); +const char * EXTRACTOR_extractLastByString(const char * type, + EXTRACTOR_KeywordList * keywords); /** * Count the number of keywords in the keyword list. diff --git a/src/plugins/splitextractor.c b/src/plugins/splitextractor.c @@ -1,6 +1,6 @@ /* This file is part of libextractor. - (C) 2002, 2003 Vidyut Samanta and Christian Grothoff + (C) 2002, 2003, 2005 Vidyut Samanta and Christian Grothoff libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published @@ -25,8 +25,8 @@ static char * TOKENIZERS = "._ ,%@-\n_[](){}"; static int MINIMUM_KEYWORD_LENGTH = 4; static void addKeyword(struct EXTRACTOR_Keywords ** list, - char * keyword, - EXTRACTOR_KeywordType type) { + char * keyword, + EXTRACTOR_KeywordType type) { EXTRACTOR_KeywordList * next; next = malloc(sizeof(EXTRACTOR_KeywordList)); next->next = *list; @@ -35,8 +35,12 @@ static void addKeyword(struct EXTRACTOR_Keywords ** list, *list = next; } -static int token(char letter) { +static int token(char letter, + const char * options) { int i; + + if (options == NULL) + options = TOKENIZERS; for (i=0;i<strlen(TOKENIZERS);i++) if (letter == TOKENIZERS[i]) return 1; @@ -44,8 +48,9 @@ static int token(char letter) { } static void splitKeywords(char * keyword, - EXTRACTOR_KeywordType type, - struct EXTRACTOR_Keywords ** list) { + EXTRACTOR_KeywordType type, + struct EXTRACTOR_Keywords ** list, + const char * options) { char * dp; int pos; int last; @@ -56,7 +61,8 @@ static void splitKeywords(char * keyword, pos = 0; last = 0; while (pos < len) { - while ((!token(dp[pos])) && (pos < len)) + while ((!token(dp[pos], + options)) && (pos < len)) pos++; dp[pos++] = 0; if (strlen(&dp[last]) >= MINIMUM_KEYWORD_LENGTH) { @@ -68,19 +74,21 @@ static void splitKeywords(char * keyword, } /* split other keywords into multiple keywords */ -struct EXTRACTOR_Keywords * libextractor_split_extract(char * filename, - char * data, - size_t size, - struct EXTRACTOR_Keywords * prev) { +struct EXTRACTOR_Keywords * +libextractor_split_extract(char * filename, + char * data, + size_t size, + struct EXTRACTOR_Keywords * prev, + const char * options) { struct EXTRACTOR_Keywords * pos; pos = prev; while (pos != NULL) { splitKeywords(pos->keyword, - EXTRACTOR_UNKNOWN, - &prev); + EXTRACTOR_UNKNOWN, + &prev, + options); pos = pos->next; } - return prev; }