net.docliff.segmenter
Class SegmentRule

java.lang.Object
  extended by net.docliff.segmenter.SegmentRule

public class SegmentRule
extends java.lang.Object


Constructor Summary
SegmentRule()
           
SegmentRule(java.lang.String initFileName)
           
 
Method Summary
 int getAbbrevCount()
           
 java.util.ArrayList getAbbrevList()
           
 java.lang.String[] getBreakChars()
           
 java.util.Hashtable getFollowCharRule()
           
 java.io.File getInitFile()
           
 org.jdom.Element getSegSection(java.lang.String lang)
           
 java.lang.String[] getWordChars()
           
 boolean isAbbreviation(char[] chararr, int iStart)
           
 boolean isAbbreviation(java.lang.String string)
           
 boolean isBreakChar(char ch)
           
 boolean isWordBreakChar(char ch)
           
 void loadRule(java.lang.String lang)
           
 java.lang.String matchAbbreviation(char[] chararr, int iStart)
           
 void setAbbrevCount(int abbrevCount)
           
 void setAbbrevList(java.util.ArrayList abbrevList)
           
 void setBreakChars(java.lang.String[] breakChars)
           
 void setFollowCharRule(java.util.Hashtable followCharRule)
           
 void setInitFile(java.io.File initFile)
           
 void setWordChars(java.lang.String[] wordChars)
           
 
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

SegmentRule

public SegmentRule()
            throws OpenTMSException
Throws:
OpenTMSException

SegmentRule

public SegmentRule(java.lang.String initFileName)
            throws OpenTMSException
Throws:
OpenTMSException
Method Detail

getAbbrevCount

public int getAbbrevCount()
Returns:
the abbrevCount

getAbbrevList

public java.util.ArrayList getAbbrevList()
Returns:
the abbrevList

getBreakChars

public java.lang.String[] getBreakChars()
Returns:
the breakChars

getFollowCharRule

public java.util.Hashtable getFollowCharRule()
Returns:

getInitFile

public java.io.File getInitFile()
Returns:
the initFile

getSegSection

public org.jdom.Element getSegSection(java.lang.String lang)

getWordChars

public java.lang.String[] getWordChars()
Returns:
the wordChars

isAbbreviation

public boolean isAbbreviation(char[] chararr,
                              int iStart)
Parameters:
chararr -
iStart -
Returns:

isAbbreviation

public boolean isAbbreviation(java.lang.String string)
Parameters:
string -
Returns:

isBreakChar

public boolean isBreakChar(char ch)
Parameters:
ch -
Returns:

isWordBreakChar

public boolean isWordBreakChar(char ch)
Parameters:
ch -
Returns:

loadRule

public void loadRule(java.lang.String lang)
              throws OpenTMSException
Throws:
OpenTMSException

matchAbbreviation

public java.lang.String matchAbbreviation(char[] chararr,
                                          int iStart)
Parameters:
chararr -
iStart -
Returns:

setAbbrevCount

public void setAbbrevCount(int abbrevCount)
Parameters:
abbrevCount - the abbrevCount to set

setAbbrevList

public void setAbbrevList(java.util.ArrayList abbrevList)
Parameters:
abbrevList - the abbrevList to set

setBreakChars

public void setBreakChars(java.lang.String[] breakChars)
Parameters:
breakChars - the breakChars to set

setFollowCharRule

public void setFollowCharRule(java.util.Hashtable followCharRule)
Parameters:
followCharRule - the followCharRule to set

setInitFile

public void setInitFile(java.io.File initFile)
Parameters:
initFile - the initFile to set

setWordChars

public void setWordChars(java.lang.String[] wordChars)
Parameters:
wordChars - the wordChars to set