commit 4e19b7b871238d11d114711251fa60d12f0ec0b9
parent 2b909c750f3cab80335d6fe54573020488bbd6b5
Author: Christian Grothoff <christian@grothoff.org>
Date: Sat, 7 May 2005 19:40:22 +0000
use options for split
Diffstat:
2 files changed, 43 insertions(+), 32 deletions(-)
diff --git a/src/include/extractor.h b/src/include/extractor.h
@@ -146,11 +146,12 @@ typedef struct EXTRACTOR_Keywords {
* Signature of the extract method that each plugin
* must provide.
*/
-typedef EXTRACTOR_KeywordList * (*ExtractMethod)(const char * filename,
- char * data,
- size_t filesize,
- EXTRACTOR_KeywordList * next,
- const char * options);
+typedef EXTRACTOR_KeywordList *
+(*ExtractMethod)(const char * filename,
+ char * data,
+ size_t filesize,
+ EXTRACTOR_KeywordList * next,
+ const char * options);
/**
* Linked list of extractor helper-libraries. An application
@@ -179,12 +180,14 @@ EXTRACTOR_ExtractorList * EXTRACTOR_loadDefaultLibraries();
* Get the textual name of the keyword.
* @return NULL if the type is not known
*/
-const char * EXTRACTOR_getKeywordTypeAsString(const EXTRACTOR_KeywordType type);
+const char *
+EXTRACTOR_getKeywordTypeAsString(const EXTRACTOR_KeywordType type);
/**
* Return the highest type number, exclusive as in [0,highest).
*/
-EXTRACTOR_KeywordType EXTRACTOR_getHighestKeywordTypeNumber();
+EXTRACTOR_KeywordType
+EXTRACTOR_getHighestKeywordTypeNumber();
/**
* Load multiple libraries as specified by the user.
@@ -202,7 +205,7 @@ EXTRACTOR_KeywordType EXTRACTOR_getHighestKeywordTypeNumber();
*/
EXTRACTOR_ExtractorList *
EXTRACTOR_loadConfigLibraries(EXTRACTOR_ExtractorList * prev,
- const char * config);
+ const char * config);
/**
* Add a library for keyword extraction.
@@ -212,7 +215,7 @@ EXTRACTOR_loadConfigLibraries(EXTRACTOR_ExtractorList * prev,
*/
EXTRACTOR_ExtractorList *
EXTRACTOR_addLibrary(EXTRACTOR_ExtractorList * prev,
- const char * library);
+ const char * library);
/**
* Add a library for keyword extraction at the END of the list.
@@ -223,8 +226,8 @@ EXTRACTOR_addLibrary(EXTRACTOR_ExtractorList * prev,
*/
EXTRACTOR_ExtractorList *
EXTRACTOR_addLibraryLast(EXTRACTOR_ExtractorList * prev,
- const char * library);
-
+ const char * library);
+
/**
* Remove a library for keyword extraction.
* @param prev the current list of libraries
@@ -233,7 +236,7 @@ EXTRACTOR_addLibraryLast(EXTRACTOR_ExtractorList * prev,
*/
EXTRACTOR_ExtractorList *
EXTRACTOR_removeLibrary(EXTRACTOR_ExtractorList * prev,
- const char * library);
+ const char * library);
/**
* Remove all extractors.
@@ -250,7 +253,7 @@ void EXTRACTOR_removeAll(EXTRACTOR_ExtractorList * libraries);
*/
EXTRACTOR_KeywordList *
EXTRACTOR_getKeywords(EXTRACTOR_ExtractorList * extractor,
- const char * filename);
+ const char * filename);
/**
@@ -261,7 +264,7 @@ EXTRACTOR_getKeywords(EXTRACTOR_ExtractorList * extractor,
*/
EXTRACTOR_KeywordList *
EXTRACTOR_removeDuplicateKeywords(EXTRACTOR_KeywordList * list,
- const unsigned int options);
+ const unsigned int options);
/**
@@ -279,7 +282,7 @@ EXTRACTOR_removeEmptyKeywords (EXTRACTOR_KeywordList * list);
* @param keywords the list of keywords to print, may be NULL
*/
void EXTRACTOR_printKeywords(FILE * handle,
- EXTRACTOR_KeywordList * keywords);
+ EXTRACTOR_KeywordList * keywords);
/**
* Free the memory occupied by the keyword list (and the
@@ -298,7 +301,7 @@ void EXTRACTOR_freeKeywords(EXTRACTOR_KeywordList * keywords);
* invalid once the keyword list is freed.
*/
const char * EXTRACTOR_extractLast(const EXTRACTOR_KeywordType type,
- EXTRACTOR_KeywordList * keywords);
+ EXTRACTOR_KeywordList * keywords);
/**
* Extract the last keyword of the given string from the keyword list.
@@ -309,8 +312,8 @@ const char * EXTRACTOR_extractLast(const EXTRACTOR_KeywordType type,
* not be freed or manipulated by the client. It will become
* invalid once the keyword list is freed.
*/
-const char * EXTRACTOR_extractLastByString (const char * type,
- EXTRACTOR_KeywordList * keywords);
+const char * EXTRACTOR_extractLastByString(const char * type,
+ EXTRACTOR_KeywordList * keywords);
/**
* Count the number of keywords in the keyword list.
diff --git a/src/plugins/splitextractor.c b/src/plugins/splitextractor.c
@@ -1,6 +1,6 @@
/*
This file is part of libextractor.
- (C) 2002, 2003 Vidyut Samanta and Christian Grothoff
+ (C) 2002, 2003, 2005 Vidyut Samanta and Christian Grothoff
libextractor is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -25,8 +25,8 @@ static char * TOKENIZERS = "._ ,%@-\n_[](){}";
static int MINIMUM_KEYWORD_LENGTH = 4;
static void addKeyword(struct EXTRACTOR_Keywords ** list,
- char * keyword,
- EXTRACTOR_KeywordType type) {
+ char * keyword,
+ EXTRACTOR_KeywordType type) {
EXTRACTOR_KeywordList * next;
next = malloc(sizeof(EXTRACTOR_KeywordList));
next->next = *list;
@@ -35,8 +35,12 @@ static void addKeyword(struct EXTRACTOR_Keywords ** list,
*list = next;
}
-static int token(char letter) {
+static int token(char letter,
+ const char * options) {
int i;
+
+ if (options == NULL)
+ options = TOKENIZERS;
for (i=0;i<strlen(TOKENIZERS);i++)
if (letter == TOKENIZERS[i])
return 1;
@@ -44,8 +48,9 @@ static int token(char letter) {
}
static void splitKeywords(char * keyword,
- EXTRACTOR_KeywordType type,
- struct EXTRACTOR_Keywords ** list) {
+ EXTRACTOR_KeywordType type,
+ struct EXTRACTOR_Keywords ** list,
+ const char * options) {
char * dp;
int pos;
int last;
@@ -56,7 +61,8 @@ static void splitKeywords(char * keyword,
pos = 0;
last = 0;
while (pos < len) {
- while ((!token(dp[pos])) && (pos < len))
+ while ((!token(dp[pos],
+ options)) && (pos < len))
pos++;
dp[pos++] = 0;
if (strlen(&dp[last]) >= MINIMUM_KEYWORD_LENGTH) {
@@ -68,19 +74,21 @@ static void splitKeywords(char * keyword,
}
/* split other keywords into multiple keywords */
-struct EXTRACTOR_Keywords * libextractor_split_extract(char * filename,
- char * data,
- size_t size,
- struct EXTRACTOR_Keywords * prev) {
+struct EXTRACTOR_Keywords *
+libextractor_split_extract(char * filename,
+ char * data,
+ size_t size,
+ struct EXTRACTOR_Keywords * prev,
+ const char * options) {
struct EXTRACTOR_Keywords * pos;
pos = prev;
while (pos != NULL) {
splitKeywords(pos->keyword,
- EXTRACTOR_UNKNOWN,
- &prev);
+ EXTRACTOR_UNKNOWN,
+ &prev,
+ options);
pos = pos->next;
}
-
return prev;
}