public abstract class AbstractJCasTermAnnotator extends JCasAnnotator_ImplBase implements JCasTermAnnotator, WindowProcessor
Modifier and Type | Field and Description |
---|---|
private DictionarySpec |
_dictionarySpec |
private java.util.Set<java.lang.String> |
_exclusionPartsOfSpeech |
private int |
_lookupWindowType |
protected int |
_minimumLookupSpan |
private static java.lang.String |
DEFAULT_EXCLUSION_TAGS |
private static java.lang.String |
DEFAULT_LOOKUP_WINDOW |
private static Logger |
LOGGER |
static java.lang.String |
PARAM_EXC_TAGS_PRP
optional part of speech tags for tokens that should not be used for lookup
|
static java.lang.String |
PARAM_MIN_SPAN_PRP
optional minimum span for tokens that should not be used for lookup
|
static java.lang.String |
PARAM_WINDOW_ANNOT_PRP
specifies the type of window to use for lookup
|
DICTIONARY_DESCRIPTOR_KEY
Constructor and Description |
---|
AbstractJCasTermAnnotator() |
Modifier and Type | Method and Description |
---|---|
private void |
findTerms(java.lang.Iterable<RareWordDictionary> dictionaries,
java.util.List<FastLookupToken> allTokens,
java.util.List<java.lang.Integer> lookupTokenIndices,
java.util.Map<RareWordDictionary,CollectionMap<TextSpan,java.lang.Long,? extends java.util.Collection<java.lang.Long>>> dictionaryTermsMap)
Given a set of dictionaries, tokens, and lookup token indices, populate a terms map with discovered terms
|
protected void |
getAnnotationsInWindow(JCas jcas,
AnnotationFS window,
java.util.List<FastLookupToken> allTokens,
java.util.Collection<java.lang.Integer> lookupTokenIndices)
For the given lookup window fills two collections with 1) All tokens in the window,
and 2) indexes of tokens in the window to be used for lookup
|
java.util.Collection<RareWordDictionary> |
getDictionaries() |
void |
initialize(UimaContext uimaContext) |
boolean |
isWindowOk(Annotation window)
Skip windows that are section headers/footers.
|
protected static int |
parseInt(java.lang.Object value,
java.lang.String name,
int defaultValue) |
void |
process(JCas jcas) |
void |
processWindow(JCas jcas,
Annotation window,
java.util.Map<RareWordDictionary,CollectionMap<TextSpan,java.lang.Long,? extends java.util.Collection<java.lang.Long>>> dictionaryTerms)
Processes a window of annotations for dictionary terms
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
findTerms
private static final Logger LOGGER
public static final java.lang.String PARAM_WINDOW_ANNOT_PRP
public static final java.lang.String PARAM_EXC_TAGS_PRP
public static final java.lang.String PARAM_MIN_SPAN_PRP
private static final java.lang.String DEFAULT_LOOKUP_WINDOW
private static final java.lang.String DEFAULT_EXCLUSION_TAGS
private DictionarySpec _dictionarySpec
private int _lookupWindowType
private final java.util.Set<java.lang.String> _exclusionPartsOfSpeech
protected int _minimumLookupSpan
public void initialize(UimaContext uimaContext) throws ResourceInitializationException
ResourceInitializationException
public void process(JCas jcas) throws AnalysisEngineProcessException
AnalysisEngineProcessException
public java.util.Collection<RareWordDictionary> getDictionaries()
getDictionaries
in interface JCasTermAnnotator
public boolean isWindowOk(Annotation window)
isWindowOk
in interface WindowProcessor
window
- annotation in which to search for termspublic void processWindow(JCas jcas, Annotation window, java.util.Map<RareWordDictionary,CollectionMap<TextSpan,java.lang.Long,? extends java.util.Collection<java.lang.Long>>> dictionaryTerms)
processWindow
in interface WindowProcessor
jcas
- -window
- annotation in which to search for termsdictionaryTerms
- map of entity types and terms for those types in the windowprivate void findTerms(java.lang.Iterable<RareWordDictionary> dictionaries, java.util.List<FastLookupToken> allTokens, java.util.List<java.lang.Integer> lookupTokenIndices, java.util.Map<RareWordDictionary,CollectionMap<TextSpan,java.lang.Long,? extends java.util.Collection<java.lang.Long>>> dictionaryTermsMap)
dictionaries
- -allTokens
- -lookupTokenIndices
- -dictionaryTermsMap
- -protected void getAnnotationsInWindow(JCas jcas, AnnotationFS window, java.util.List<FastLookupToken> allTokens, java.util.Collection<java.lang.Integer> lookupTokenIndices)
jcas
- -window
- annotation lookup windowallTokens
- filled with all tokens, including punctuation, etc.lookupTokenIndices
- filled with indices of tokens to use for lookupprotected static int parseInt(java.lang.Object value, java.lang.String name, int defaultValue)