[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r763 - in Extractor/src: include plugins
From: |
grothoff |
Subject: |
[GNUnet-SVN] r763 - in Extractor/src: include plugins |
Date: |
Sat, 7 May 2005 12:40:27 -0700 (PDT) |
Author: grothoff
Date: 2005-05-07 12:40:22 -0700 (Sat, 07 May 2005)
New Revision: 763
Modified:
Extractor/src/include/extractor.h
Extractor/src/plugins/splitextractor.c
Log:
use options for split
Modified: Extractor/src/include/extractor.h
===================================================================
--- Extractor/src/include/extractor.h 2005-05-07 19:27:45 UTC (rev 762)
+++ Extractor/src/include/extractor.h 2005-05-07 19:40:22 UTC (rev 763)
@@ -146,11 +146,12 @@
* Signature of the extract method that each plugin
* must provide.
*/
-typedef EXTRACTOR_KeywordList * (*ExtractMethod)(const char * filename,
- char * data,
- size_t filesize,
- EXTRACTOR_KeywordList * next,
- const char * options);
+typedef EXTRACTOR_KeywordList *
+(*ExtractMethod)(const char * filename,
+
char * data,
+
size_t filesize,
+
EXTRACTOR_KeywordList *
next,
+
const char * options);
/**
* Linked list of extractor helper-libraries. An application
@@ -179,12 +180,14 @@
* Get the textual name of the keyword.
* @return NULL if the type is not known
*/
-const char * EXTRACTOR_getKeywordTypeAsString(const EXTRACTOR_KeywordType
type);
+const char *
+EXTRACTOR_getKeywordTypeAsString(const EXTRACTOR_KeywordType type);
/**
* Return the highest type number, exclusive as in [0,highest).
*/
-EXTRACTOR_KeywordType EXTRACTOR_getHighestKeywordTypeNumber();
+EXTRACTOR_KeywordType
+EXTRACTOR_getHighestKeywordTypeNumber();
/**
* Load multiple libraries as specified by the user.
@@ -202,7 +205,7 @@
*/
EXTRACTOR_ExtractorList *
EXTRACTOR_loadConfigLibraries(EXTRACTOR_ExtractorList * prev,
- const char * config);
+
const char * config);
/**
* Add a library for keyword extraction.
@@ -212,7 +215,7 @@
*/
EXTRACTOR_ExtractorList *
EXTRACTOR_addLibrary(EXTRACTOR_ExtractorList * prev,
- const char * library);
+
const char * library);
/**
* Add a library for keyword extraction at the END of the list.
@@ -223,8 +226,8 @@
*/
EXTRACTOR_ExtractorList *
EXTRACTOR_addLibraryLast(EXTRACTOR_ExtractorList * prev,
- const char * library);
-
+
const char * library);
+
/**
* Remove a library for keyword extraction.
* @param prev the current list of libraries
@@ -233,7 +236,7 @@
*/
EXTRACTOR_ExtractorList *
EXTRACTOR_removeLibrary(EXTRACTOR_ExtractorList * prev,
- const char * library);
+
const char * library);
/**
* Remove all extractors.
@@ -250,7 +253,7 @@
*/
EXTRACTOR_KeywordList *
EXTRACTOR_getKeywords(EXTRACTOR_ExtractorList * extractor,
- const char * filename);
+
const char * filename);
/**
@@ -261,7 +264,7 @@
*/
EXTRACTOR_KeywordList *
EXTRACTOR_removeDuplicateKeywords(EXTRACTOR_KeywordList * list,
- const unsigned int options);
+
const unsigned int options);
/**
@@ -279,7 +282,7 @@
* @param keywords the list of keywords to print, may be NULL
*/
void EXTRACTOR_printKeywords(FILE * handle,
- EXTRACTOR_KeywordList * keywords);
+
EXTRACTOR_KeywordList * keywords);
/**
* Free the memory occupied by the keyword list (and the
@@ -298,7 +301,7 @@
* invalid once the keyword list is freed.
*/
const char * EXTRACTOR_extractLast(const EXTRACTOR_KeywordType type,
- EXTRACTOR_KeywordList * keywords);
+
EXTRACTOR_KeywordList * keywords);
/**
* Extract the last keyword of the given string from the keyword list.
@@ -309,8 +312,8 @@
* not be freed or manipulated by the client. It will become
* invalid once the keyword list is freed.
*/
-const char * EXTRACTOR_extractLastByString (const char * type,
- EXTRACTOR_KeywordList * keywords);
+const char * EXTRACTOR_extractLastByString(const char * type,
+
EXTRACTOR_KeywordList * keywords);
/**
* Count the number of keywords in the keyword list.
Modified: Extractor/src/plugins/splitextractor.c
===================================================================
--- Extractor/src/plugins/splitextractor.c 2005-05-07 19:27:45 UTC (rev
762)
+++ Extractor/src/plugins/splitextractor.c 2005-05-07 19:40:22 UTC (rev
763)
@@ -1,6 +1,6 @@
/*
This file is part of libextractor.
- (C) 2002, 2003 Vidyut Samanta and Christian Grothoff
+ (C) 2002, 2003, 2005 Vidyut Samanta and Christian Grothoff
libextractor is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -25,8 +25,8 @@
static int MINIMUM_KEYWORD_LENGTH = 4;
static void addKeyword(struct EXTRACTOR_Keywords ** list,
- char * keyword,
- EXTRACTOR_KeywordType type) {
+
char * keyword,
+
EXTRACTOR_KeywordType type) {
EXTRACTOR_KeywordList * next;
next = malloc(sizeof(EXTRACTOR_KeywordList));
next->next = *list;
@@ -35,8 +35,12 @@
*list = next;
}
-static int token(char letter) {
+static int token(char letter,
+
const char * options) {
int i;
+
+ if (options == NULL)
+ options = TOKENIZERS;
for (i=0;i<strlen(TOKENIZERS);i++)
if (letter == TOKENIZERS[i])
return 1;
@@ -44,8 +48,9 @@
}
static void splitKeywords(char * keyword,
- EXTRACTOR_KeywordType type,
- struct EXTRACTOR_Keywords ** list) {
+
EXTRACTOR_KeywordType type,
+
struct EXTRACTOR_Keywords **
list,
+
const char * options) {
char * dp;
int pos;
int last;
@@ -56,7 +61,8 @@
pos = 0;
last = 0;
while (pos < len) {
- while ((!token(dp[pos])) && (pos < len))
+ while ((!token(dp[pos],
+
options)) && (pos < len))
pos++;
dp[pos++] = 0;
if (strlen(&dp[last]) >= MINIMUM_KEYWORD_LENGTH) {
@@ -68,19 +74,21 @@
}
/* split other keywords into multiple keywords */
-struct EXTRACTOR_Keywords * libextractor_split_extract(char * filename,
- char * data,
- size_t size,
- struct
EXTRACTOR_Keywords * prev) {
+struct EXTRACTOR_Keywords *
+libextractor_split_extract(char * filename,
+
char * data,
+
size_t size,
+
struct
EXTRACTOR_Keywords * prev,
+
const char * options) {
struct EXTRACTOR_Keywords * pos;
pos = prev;
while (pos != NULL) {
splitKeywords(pos->keyword,
- EXTRACTOR_UNKNOWN,
- &prev);
+
EXTRACTOR_UNKNOWN,
+
&prev,
+
options);
pos = pos->next;
}
-
return prev;
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r763 - in Extractor/src: include plugins,
grothoff <=