|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectde.folt.util.WordHandling
public class WordHandling
This class implements several methods dealing with word handling, e.g. splitting up segments into words. The default split string is defined as:
defaultSplitString = "\\s" + "|[" + Pattern.quote(".*()[]:;,'#+=?!$%&\"") + "]";
| Constructor Summary | |
|---|---|
WordHandling()
|
|
| Method Summary | |
|---|---|
java.lang.String |
getDefaultSplitString()
|
java.lang.String |
getDefaultSplitStringDE()
|
java.lang.String |
getDefaultSplitStringEN()
|
java.lang.String |
getDefaultSplitStringES()
|
static java.lang.String |
getDefaultWordSplitChars()
|
java.util.Hashtable<java.lang.String,java.lang.String> |
getLanguageDefaultWordSplitChars()
|
java.lang.String |
getSplitChars(java.lang.String language)
|
void |
init()
Initialise the word split chars |
boolean |
isbXmlMode()
|
static void |
main(java.lang.String[] args)
|
java.lang.String[] |
segmentToWordArray(java.lang.String string)
segmentToWordArray segments a string into an array of words using defaultSplitString (Pattern.quote(".*()[]:;,'#+=?! |
java.lang.String[] |
segmentToWordArray(java.lang.String string,
java.lang.String language)
segmentToWordArray segments a string into an array of words based on a language |
void |
setbXmlMode(boolean bXmlMode)
|
void |
setDefaultSplitString(java.lang.String defaultSplitString)
|
void |
setDefaultSplitStringDE(java.lang.String defaultSplitStringDE)
|
void |
setDefaultSplitStringEN(java.lang.String defaultSplitStringEN)
|
void |
setDefaultSplitStringES(java.lang.String defaultSplitStringES)
|
static void |
setDefaultWordSplitChars(java.lang.String defaultWordSplitChars)
|
void |
setLanguageDefaultWordSplitChars(java.util.Hashtable<java.lang.String,java.lang.String> languageDefaultWordSplitChars)
|
void |
setlanguageWordSplitChars(java.lang.String language,
java.lang.String splitChars)
Add or replace a split character for a language |
static java.lang.String |
stem(java.lang.String text,
java.lang.String language)
|
| Methods inherited from class java.lang.Object |
|---|
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Constructor Detail |
|---|
public WordHandling()
| Method Detail |
|---|
public static java.lang.String getDefaultWordSplitChars()
public static void main(java.lang.String[] args)
public static void setDefaultWordSplitChars(java.lang.String defaultWordSplitChars)
public java.lang.String getDefaultSplitString()
public java.lang.String getDefaultSplitStringDE()
public java.lang.String getDefaultSplitStringEN()
public java.lang.String getDefaultSplitStringES()
public java.util.Hashtable<java.lang.String,java.lang.String> getLanguageDefaultWordSplitChars()
public java.lang.String getSplitChars(java.lang.String language)
language -
public void init()
public boolean isbXmlMode()
public java.lang.String[] segmentToWordArray(java.lang.String string)
string - the string to segment
public java.lang.String[] segmentToWordArray(java.lang.String string,
java.lang.String language)
string - the string to segmentlanguage - language to use (e.g. de-de; will search first for de-de and
then for de; if nothing found defaultSplitString will be used
(Pattern.quote(".*()[]:;,'#+=?!$%&\"{}"))
public void setbXmlMode(boolean bXmlMode)
public void setDefaultSplitString(java.lang.String defaultSplitString)
public void setDefaultSplitStringDE(java.lang.String defaultSplitStringDE)
public void setDefaultSplitStringEN(java.lang.String defaultSplitStringEN)
public void setDefaultSplitStringES(java.lang.String defaultSplitStringES)
public void setLanguageDefaultWordSplitChars(java.util.Hashtable<java.lang.String,java.lang.String> languageDefaultWordSplitChars)
public void setlanguageWordSplitChars(java.lang.String language,
java.lang.String splitChars)
language - the language codesplitChars - the chars to use (will be Quoted for regular Expressions and
\s from regular expression added)
public static java.lang.String stem(java.lang.String text,
java.lang.String language)
text - language -
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||