@PipeBitInfo(name="CCDA Sectionizer", description="Annotates Document Sections by detecting Section Headers using Regular Expressions provided in a File.", dependencies=DOCUMENT_ID, products=SECTION) public class CDASegmentAnnotator extends org.apache.uima.fit.component.JCasAnnotator_ImplBase
Modifier and Type | Field and Description |
---|---|
protected static String |
DEFAULT_SECTION_FILE_NAME |
static String |
PARAM_COMMENT |
static String |
PARAM_FIELD_SEPERATOR |
static String |
PARAM_SECTIONS_FILE |
protected static HashMap<String,Pattern> |
patterns |
protected static HashMap<String,String> |
section_names |
protected String |
sections_path |
static String |
SIMPLE_SEGMENT |
Constructor and Description |
---|
CDASegmentAnnotator() |
Modifier and Type | Method and Description |
---|---|
void |
initialize(org.apache.uima.UimaContext aContext)
Init and load the sections mapping file and precompile the regex matches
into a hashmap
|
void |
process(org.apache.uima.jcas.JCas jCas) |
getRequiredCasInterface, process
getCasInstancesRequired, hasNext, next
protected static final String DEFAULT_SECTION_FILE_NAME
public static final String PARAM_FIELD_SEPERATOR
public static final String PARAM_COMMENT
public static final String SIMPLE_SEGMENT
public static final String PARAM_SECTIONS_FILE
protected String sections_path
public void initialize(org.apache.uima.UimaContext aContext) throws org.apache.uima.resource.ResourceInitializationException
initialize
in interface org.apache.uima.analysis_component.AnalysisComponent
initialize
in class org.apache.uima.fit.component.JCasAnnotator_ImplBase
org.apache.uima.resource.ResourceInitializationException
public void process(org.apache.uima.jcas.JCas jCas) throws org.apache.uima.analysis_engine.AnalysisEngineProcessException
process
in class org.apache.uima.analysis_component.JCasAnnotator_ImplBase
org.apache.uima.analysis_engine.AnalysisEngineProcessException
Copyright © 2012-2017 The Apache Software Foundation. All Rights Reserved.