public class LvgAnnotator
extends JCasAnnotator_ImplBase
Modifier and Type | Class and Description |
---|---|
(package private) class |
LvgAnnotator.LemmaLocalClass
Basic class to group a lemma word with its various parts of speech.
|
Modifier and Type | Field and Description |
---|---|
private java.lang.String |
cmdCacheFileLocation |
private int |
cmdCacheFreqCutoff |
static java.lang.String[] |
defaultExclusionWords |
static java.lang.String[] |
defaultTreebankMap |
private java.util.Set<java.lang.String> |
exclusionSet |
private java.lang.String |
lemmaCacheFileLocation |
private int |
lemmaCacheFreqCutoff |
private java.util.Map<java.lang.String,java.util.Set<LvgAnnotator.LemmaLocalClass>> |
lemmaCacheMap |
private Logger |
logger |
private LvgCmdApi |
lvgCmd |
private LvgLexItemApi |
lvgLexItem |
private LvgCmdApiResource |
lvgResource |
private java.util.Map<java.lang.String,java.lang.String> |
normCacheMap |
static java.lang.String |
PARAM_CMD_CACHE_FILE |
static java.lang.String |
PARAM_EXCLUSION_WORDS |
static java.lang.String |
PARAM_LEMMA_CACHE_FILE_LOCATION
Value is "LemmaCacheFileLocation".
|
static java.lang.String |
PARAM_LEMMA_CACHE_FREQUENCY_CUTOFF
Value is "LemmaCacheFrequencyCutoff".
|
static java.lang.String |
PARAM_LEMMA_FREQ_CUTOFF |
static java.lang.String |
PARAM_LVGCMDAPI_RESRC_KEY |
static java.lang.String |
PARAM_POST_LEMMAS
Value is "PostLemmas".
|
static java.lang.String |
PARAM_SKIP_SEGMENTS |
static java.lang.String |
PARAM_USE_CMD_CACHE |
static java.lang.String |
PARAM_USE_LEMMA_CACHE
Value is "UseLemmaCache".
|
static java.lang.String |
PARAM_USE_SEGMENTS |
static java.lang.String |
PARAM_XT_MAP |
private boolean |
postLemmas |
private java.lang.String[] |
skipSegmentIDs |
private java.util.Set<java.lang.String> |
skipSegmentsSet |
private boolean |
useCmdCache |
private boolean |
useLemmaCache |
private boolean |
useSegments |
private java.lang.String[] |
wordsToExclude |
private java.util.Map<java.lang.String,java.lang.String> |
xeroxTreebankMap |
private java.lang.String[] |
xtMaps |
Constructor and Description |
---|
LvgAnnotator() |
Modifier and Type | Method and Description |
---|---|
protected void |
annotateRange(JCas jcas,
java.lang.String text,
int rangeBegin,
int rangeEnd)
A utility method that annotates a given range.
|
private void |
configInit()
Sets configuration parameters with values from the descriptor.
|
static AnalysisEngineDescription |
createAnnotatorDescription() |
void |
initialize(UimaContext aContext)
Performs initialization logic.
|
private void |
loadCmdCacheFile(java.lang.String cpLocation)
Helper method that loads a Norm cache file.
|
private void |
loadLemmaCacheFile(java.lang.String cpLocation)
Helper method that loads a Lemma cache file.
|
void |
process(JCas jcas)
Invokes this annotator's analysis logic.
|
private void |
setCanonicalForm(WordToken wordAnnotation,
java.lang.String word) |
private void |
setLemma(WordToken wordAnnotation,
java.lang.String word,
JCas jcas) |
public static final java.lang.String[] defaultExclusionWords
public static final java.lang.String[] defaultTreebankMap
public static final java.lang.String PARAM_POST_LEMMAS
private boolean postLemmas
public static final java.lang.String PARAM_USE_LEMMA_CACHE
private boolean useLemmaCache
public static final java.lang.String PARAM_LEMMA_CACHE_FILE_LOCATION
private java.lang.String lemmaCacheFileLocation
public static final java.lang.String PARAM_LEMMA_CACHE_FREQUENCY_CUTOFF
private int cmdCacheFreqCutoff
public static final java.lang.String PARAM_USE_SEGMENTS
private boolean useSegments
public static final java.lang.String PARAM_SKIP_SEGMENTS
private java.lang.String[] skipSegmentIDs
private java.util.Set<java.lang.String> skipSegmentsSet
public static final java.lang.String PARAM_XT_MAP
private java.lang.String[] xtMaps
private java.util.Map<java.lang.String,java.lang.String> xeroxTreebankMap
public static final java.lang.String PARAM_USE_CMD_CACHE
private boolean useCmdCache
public static final java.lang.String PARAM_CMD_CACHE_FILE
private java.lang.String cmdCacheFileLocation
public static final java.lang.String PARAM_LEMMA_FREQ_CUTOFF
private int lemmaCacheFreqCutoff
public static final java.lang.String PARAM_EXCLUSION_WORDS
private java.lang.String[] wordsToExclude
private java.util.Set<java.lang.String> exclusionSet
private Logger logger
public static final java.lang.String PARAM_LVGCMDAPI_RESRC_KEY
private LvgCmdApiResource lvgResource
private LvgCmdApi lvgCmd
private LvgLexItemApi lvgLexItem
private java.util.Map<java.lang.String,java.lang.String> normCacheMap
private java.util.Map<java.lang.String,java.util.Set<LvgAnnotator.LemmaLocalClass>> lemmaCacheMap
public void initialize(UimaContext aContext) throws ResourceInitializationException
ResourceInitializationException
org.apache.uima.analysis_engine.annotator.BaseAnnotator#initialize(AnnotatorContext)
private void configInit()
public void process(JCas jcas) throws AnalysisEngineProcessException
AnalysisEngineProcessException
protected void annotateRange(JCas jcas, java.lang.String text, int rangeBegin, int rangeEnd) throws AnalysisEngineProcessException
AnalysisEngineProcessException
private void setCanonicalForm(WordToken wordAnnotation, java.lang.String word) throws AnalysisEngineProcessException
AnalysisEngineProcessException
private void setLemma(WordToken wordAnnotation, java.lang.String word, JCas jcas) throws AnalysisEngineProcessException
AnalysisEngineProcessException
private void loadCmdCacheFile(java.lang.String cpLocation) throws java.io.FileNotFoundException, java.io.IOException
location
- java.io.FileNotFoundException
java.io.IOException
private void loadLemmaCacheFile(java.lang.String cpLocation) throws java.io.FileNotFoundException, java.io.IOException
location
- java.io.FileNotFoundException
java.io.IOException
public static AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException
ResourceInitializationException