public final class RareWordTermMapCreator
extends java.lang.Object
RareWordTermMapCreator.CuiTerm
Objects,
this factory can create a Map of RareWordTerm
collections
indexed by rare word.
This map can be used to create a MemRareWordDictionary
Author: SPF
Affiliation: CHIP-NLP
Date: 1/9/14Modifier and Type | Class and Description |
---|---|
static class |
RareWordTermMapCreator.CuiTerm |
Modifier and Type | Field and Description |
---|---|
private static java.lang.String[] |
BAD_POS_TERMS |
private static Logger |
LOGGER |
private static java.lang.String[] |
PREFIXES |
private static java.lang.String[] |
SUFFIXES |
Modifier | Constructor and Description |
---|---|
private |
RareWordTermMapCreator() |
Modifier and Type | Method and Description |
---|---|
static CollectionMap<java.lang.String,RareWordTerm,java.util.List<RareWordTerm>> |
createRareWordTermMap(java.lang.Iterable<RareWordTermMapCreator.CuiTerm> cuiTerms) |
private static java.util.Map<java.lang.String,java.lang.Integer> |
createTokenCountMap(java.lang.Iterable<RareWordTermMapCreator.CuiTerm> cuiTerms) |
private static java.lang.String |
getNextCharTerm(java.lang.String word) |
private static java.lang.String |
getRareWord(java.lang.String tokenizedTerm,
java.util.Map<java.lang.String,java.lang.Integer> tokenCountMap) |
private static int |
getTokenCount(java.lang.String tokenizedTerm) |
private static java.lang.String |
getTokenizedTerm(java.lang.String term) |
private static java.util.List<java.lang.String> |
getTokens(java.lang.String word) |
private static int |
getWordIndex(java.lang.String tokenizedTerm,
java.lang.String word) |
private static boolean |
isPrefix(java.lang.String word) |
private static boolean |
isRarableToken(java.lang.String token) |
private static boolean |
isSuffix(java.lang.String word,
int startIndex) |
private static final Logger LOGGER
private static final java.lang.String[] PREFIXES
private static final java.lang.String[] SUFFIXES
private static final java.lang.String[] BAD_POS_TERMS
public static CollectionMap<java.lang.String,RareWordTerm,java.util.List<RareWordTerm>> createRareWordTermMap(java.lang.Iterable<RareWordTermMapCreator.CuiTerm> cuiTerms)
private static java.util.Map<java.lang.String,java.lang.Integer> createTokenCountMap(java.lang.Iterable<RareWordTermMapCreator.CuiTerm> cuiTerms)
private static java.lang.String getRareWord(java.lang.String tokenizedTerm, java.util.Map<java.lang.String,java.lang.Integer> tokenCountMap)
private static boolean isRarableToken(java.lang.String token)
private static int getWordIndex(java.lang.String tokenizedTerm, java.lang.String word)
private static int getTokenCount(java.lang.String tokenizedTerm)
private static java.lang.String getTokenizedTerm(java.lang.String term)
private static java.util.List<java.lang.String> getTokens(java.lang.String word)
private static boolean isPrefix(java.lang.String word)
private static boolean isSuffix(java.lang.String word, int startIndex)
private static java.lang.String getNextCharTerm(java.lang.String word)