public class DrugMentionAnnotator
extends JCasAnnotator_ImplBase
Modifier and Type | Field and Description |
---|---|
static java.lang.String |
BOUNDARY_ANN_TYPE
Annotation type that defines the boundary within which the dictionary hits should be present.
|
static java.lang.String |
DISTANCE |
static java.lang.String |
DISTANCE_ANN_TYPE
Annotation type that is used to count the distance.
|
private static boolean |
handledRanges |
private int |
iAnnotationType |
private int |
iBoundaryAnnType |
private int[] |
intermediateTypesToRemove |
private DecimalStrengthFSM |
iv_decimalFSM |
private DosagesFSM |
iv_dosagesFSM |
private DurationFSM |
iv_durationFSM |
private java.util.Set<java.lang.String> |
iv_exclusionTagSet |
private FormFSM |
iv_formFSM |
private FractionStrengthFSM |
iv_fractionFSM |
private FrequencyFSM |
iv_frequencyFSM |
private FrequencyUnitFSM |
iv_frequencyUnitFSM |
static Logger |
iv_logger |
private java.util.Set<java.lang.String> |
iv_medicationRelatedSections |
private RangeStrengthFSM |
iv_rangeFSM |
private RouteFSM |
iv_routeFSM |
private DrugChangeStatusFSM |
iv_statusFSM |
private StrengthFSM |
iv_strengthFSM |
private StrengthUnitFSM |
iv_strengthUnitFSM |
private SubSectionIndicatorFSM |
iv_subMedSectionFSM |
private SuffixStrengthFSM |
iv_suffixFSM |
private int |
iWindowSize |
private static int |
NERTypeIdentifier |
static int |
NO_ANNOTATION_TYPE_SPECIFIED |
static int |
NO_WINDOW_SIZE_SPECIFIED |
static java.lang.String |
PARAM_SEGMENTS_MEDICATION_RELATED
This identifies the section ids that will be considered in generating DrugMentionAnnotaitons
|
Constructor and Description |
---|
DrugMentionAnnotator() |
Modifier and Type | Method and Description |
---|---|
private BaseToken |
adaptToFSMBaseToken(BaseToken obj) |
private void |
addAnnotations(JCas jcas,
java.util.Set annotations,
int type)
used by executeFSMs to add annotations for features for DrugMention
|
private void |
addMedicationSpecificAttributes(JCas jcas,
DrugMentionAnnotation fromAnnotation,
MedicationMention medicationMention) |
private float |
alignDrugMentionAttributes(java.lang.String strengthText,
DrugMention dm,
DrugMentionAnnotation drugTokenAnt,
DrugMentionAnnotation recurseNER,
java.lang.String relatedStatus,
boolean statusFound,
boolean overrideStatus,
boolean maxExists,
java.lang.String doseText,
java.lang.String frequencyText,
java.lang.String frequencyUnitText) |
private int |
anchorEndofline(int[][] elementSpan,
int elementSpanLength,
int endPhrase,
MedicationMention nea) |
private void |
executeFSMs(JCas jcas,
java.util.List baseTokenList,
java.util.List namedE,
java.util.List wordTokenList)
The namedE consists of a list of the Named Entities (NE) found in the prior
method.
|
private boolean |
findCoveredTextInSpan(JCas jcas,
int annotationType,
int beginOffset,
int endOffset,
java.lang.String[] searchStrs) |
private void |
findDrugAttributesInRange(JCas jcas,
int begin,
int end)
finds drug attributes using the given range, this method uses FSM
|
private int[] |
findElementRelativeToNE(int[][] elementSpan,
int[][] parenthesisSpan,
int elementSpanLength,
int parenthesisSpanLength,
int priorDrugEnd,
int startWithParenNum,
int endPhrase,
MedicationMention nea) |
private void |
findFSMInRange(JCas jcas,
int begin,
int end) |
private int |
findInPattern(JCas jcas,
int begin,
int end,
int elementType,
int[][] location) |
private boolean |
findNextClosestRightParenRelativeToElement(int spanLength,
int[][] elementSpan,
Annotation nea,
int beginSpan,
int startOffset) |
private int[] |
findNextDrugEntityPost(int spanLength,
int[][] elementSpan,
MedicationMention nea,
int endPhrase) |
private int[] |
findNextDrugEntityPre(int spanLength,
int[][] elementSpan,
MedicationMention nea,
int priorDrugEnd) |
private boolean |
findNextParenRelativeToElement(int spanLength,
int[][] elementSpan,
Annotation nea,
int parenEnd,
int startOffset) |
private boolean |
findNextParenRelativeToNE(int spanLength,
int[][] elementSpan,
MedicationMention nea,
int priorDrugEnd) |
private boolean |
findNextParenRelativeToNE(int spanLength,
int[][] elementSpan,
MedicationMention nea,
int priorDrugEnd,
int startOffset) |
private int[] |
findOffsetsInPattern(JCas jcas,
int begin,
int end,
int elementType,
int[][] location,
boolean highest) |
private int |
findTextualStringOffset(java.lang.String text)
Finds offset of string the represents end of numeric portion
|
private java.util.List |
findUniqueMentions(java.lang.Object[] holdOutSet)
finds unique annotations by their begin offsets
|
private int[] |
generateAdditionalNER(JCas jcas,
DrugMentionAnnotation tokenDrugNER,
DrugChangeStatusAnnotation drugChangeStatus,
int beginSpan,
int endSpan,
int count,
java.util.List globalNER) |
private void |
generateDrugMentions(JCas jcas,
Segment seg,
boolean narrativeType) |
private void |
generateDrugMentionsAndAnnotations(JCas jcas,
java.util.List<MedicationMention> nerTokenList,
int begin,
int end,
DrugMentionAnnotation recurseNER,
java.lang.String[] relatedStatus,
int countNER,
java.util.List<DrugMentionAnnotation> globalDrugNER) |
private void |
generateUidValues(JCas jcas)
Generates UID values for all MedicationMention objects.
|
private int |
generateUidValues(JCas jcas,
int type,
int firstId)
Generates UID values for all MedicationMention
objects.
|
private int[] |
getAdjustedWindowSpan(JCas jcas,
int begin,
int end,
boolean highestRange)
return window span to find reasons for the given d
|
private java.util.List<Annotation> |
getAnnotationsInSpan(JCas jcas,
int type,
int begin,
int end) |
private java.util.List |
getAnnotationsInSpanWithAdaptToBaseTokenFSM(JCas jcas,
int type,
int begin,
int end) |
private int[] |
getNarrativeSpansContainingGivenSpanType(JCas jcas,
int begin,
int annotType) |
private Segment |
getSegmentContainingOffsets(JCas jcas,
int start,
int end) |
private int[] |
getSentenceSpanContainingGivenSpan(JCas jcas,
int begin,
int end) |
private int |
getSubSectionAnnotationBodyEnd(Segment currSeg,
SubSectionIndicator nextSsi)
Decides where the SubSectionAnnotation should end.
|
private int[][] |
getWindowSpan(JCas jcas,
java.lang.String sectionType,
int typeAnnotation,
int begin,
int end,
boolean useBegin,
int sizeArrays) |
private boolean |
hasMultipleDrugsInSpan(JCas jcas,
int begin,
int end)
Return true if exists more than one drug and reason within the span,
otherwise return false
|
private boolean |
hasMultipleElementsInSpan(JCas jcas,
int begin,
int end)
Return true if exists more than one drug and reason within the span,
otherwise return false
|
void |
initialize(UimaContext annotCtx) |
private boolean |
isDuplicate(java.lang.Object[] neArray,
int curIdx,
int checkIdx) |
private int |
lastInPattern(JCas jcas,
int begin,
int end,
int elementType,
int[][] location) |
private boolean |
multipleDrugsInSpan(JCas jcas,
int begin,
int end)
Return true if exists more than one drug and reason within the span, otherwise return false
|
private boolean |
multipleElementsInSpan(JCas jcas,
int begin,
int end)
Return true if exists more than one drug and reason within the span,
otherwise return false
|
private void |
prepareSubSection(JCas jcas,
JFSIndexRepository indexes,
java.util.Set subSectionIndSet)
Given the set of subSectionInds to parse (via SubSectionIndicatorFSM)
create SubSectionAnnotation This method created SubsectionAnnotation and
sets the header begin, end as well as body begin and end
|
void |
process(JCas jcas) |
private void |
removeAnnotations(JCas jcas,
int type) |
private void |
removeDrugNerTypes(JCas jcas,
int[] intermediateTypesToRemove)
Remove most extra annotation types that we created here but not all
as downstream annotators might want to use some such as SubSectionAnnotation
|
private java.util.List |
sortAnnotations(java.lang.Object[] holdOutSet)
Sort annotations by begin offset
|
private int[][] |
sortSignatureElements(JCas jcas,
int begin,
int end,
int typeAnnotation,
int[] senSpan,
int sizeArray)
The range defined by the begin and end is searched all discovered drugs, newline locations and related drug
signature elements.
|
private java.util.List |
sortSubSectionInd(java.lang.Object[] holdOutSet) |
private int[] |
statusChangePhraseGenerator(JCas jcas,
int begin,
int end,
boolean maxExists,
java.util.Iterator uniqueNER,
java.util.Iterator orderedStatusChanges,
java.util.List holdStatusChanges,
java.lang.String[] relatedStatus,
DrugMentionAnnotation drugTokenAnt,
java.util.List globalDrugNER,
int countNER) |
public static Logger iv_logger
public static final java.lang.String PARAM_SEGMENTS_MEDICATION_RELATED
public static java.lang.String DISTANCE
public static java.lang.String DISTANCE_ANN_TYPE
public static java.lang.String BOUNDARY_ANN_TYPE
public static int NO_WINDOW_SIZE_SPECIFIED
public static int NO_ANNOTATION_TYPE_SPECIFIED
private int iWindowSize
private int iAnnotationType
private int iBoundaryAnnType
private FractionStrengthFSM iv_fractionFSM
private RangeStrengthFSM iv_rangeFSM
private SubSectionIndicatorFSM iv_subMedSectionFSM
private DosagesFSM iv_dosagesFSM
private SuffixStrengthFSM iv_suffixFSM
private DurationFSM iv_durationFSM
private RouteFSM iv_routeFSM
private FrequencyFSM iv_frequencyFSM
private DrugChangeStatusFSM iv_statusFSM
private DecimalStrengthFSM iv_decimalFSM
private StrengthFSM iv_strengthFSM
private StrengthUnitFSM iv_strengthUnitFSM
private FrequencyUnitFSM iv_frequencyUnitFSM
private FormFSM iv_formFSM
private static final int NERTypeIdentifier
private static boolean handledRanges
private java.util.Set<java.lang.String> iv_exclusionTagSet
private java.util.Set<java.lang.String> iv_medicationRelatedSections
private int[] intermediateTypesToRemove
public void initialize(UimaContext annotCtx) throws ResourceInitializationException
ResourceInitializationException
public void process(JCas jcas) throws AnalysisEngineProcessException
AnalysisEngineProcessException
private void removeDrugNerTypes(JCas jcas, int[] intermediateTypesToRemove)
jcas
- private void removeAnnotations(JCas jcas, int type)
private void addMedicationSpecificAttributes(JCas jcas, DrugMentionAnnotation fromAnnotation, MedicationMention medicationMention)
private void generateUidValues(JCas jcas)
private int generateUidValues(JCas jcas, int type, int firstId)
private java.util.List sortSubSectionInd(java.lang.Object[] holdOutSet)
private java.util.List sortAnnotations(java.lang.Object[] holdOutSet)
holdOutSet
- private java.util.List findUniqueMentions(java.lang.Object[] holdOutSet)
holdOutSet
- private boolean isDuplicate(java.lang.Object[] neArray, int curIdx, int checkIdx)
private void prepareSubSection(JCas jcas, JFSIndexRepository indexes, java.util.Set subSectionIndSet) throws java.lang.Exception
jcas
- indexes
- subSectionIndSet
- java.lang.Exception
private int getSubSectionAnnotationBodyEnd(Segment currSeg, SubSectionIndicator nextSsi)
private Segment getSegmentContainingOffsets(JCas jcas, int start, int end)
private void addAnnotations(JCas jcas, java.util.Set annotations, int type)
jcas
- annotations
- type
- private int findTextualStringOffset(java.lang.String text)
private void executeFSMs(JCas jcas, java.util.List baseTokenList, java.util.List namedE, java.util.List wordTokenList) throws AnnotatorProcessException
jcas
- baseTokenList
- namedE
- wordTokenList
- AnnotatorProcessException
private void generateDrugMentions(JCas jcas, Segment seg, boolean narrativeType) throws java.lang.Exception
java.lang.Exception
private boolean findCoveredTextInSpan(JCas jcas, int annotationType, int beginOffset, int endOffset, java.lang.String[] searchStrs)
private java.util.List getAnnotationsInSpanWithAdaptToBaseTokenFSM(JCas jcas, int type, int begin, int end) throws java.lang.Exception
java.lang.Exception
private java.util.List<Annotation> getAnnotationsInSpan(JCas jcas, int type, int begin, int end)
private void findDrugAttributesInRange(JCas jcas, int begin, int end) throws java.lang.Exception
jcas
- begin
- end
- java.lang.Exception
private void generateDrugMentionsAndAnnotations(JCas jcas, java.util.List<MedicationMention> nerTokenList, int begin, int end, DrugMentionAnnotation recurseNER, java.lang.String[] relatedStatus, int countNER, java.util.List<DrugMentionAnnotation> globalDrugNER) throws java.lang.Exception
java.lang.Exception
private float alignDrugMentionAttributes(java.lang.String strengthText, DrugMention dm, DrugMentionAnnotation drugTokenAnt, DrugMentionAnnotation recurseNER, java.lang.String relatedStatus, boolean statusFound, boolean overrideStatus, boolean maxExists, java.lang.String doseText, java.lang.String frequencyText, java.lang.String frequencyUnitText)
private int[] statusChangePhraseGenerator(JCas jcas, int begin, int end, boolean maxExists, java.util.Iterator uniqueNER, java.util.Iterator orderedStatusChanges, java.util.List holdStatusChanges, java.lang.String[] relatedStatus, DrugMentionAnnotation drugTokenAnt, java.util.List globalDrugNER, int countNER) throws java.lang.Exception
java.lang.Exception
private int[] generateAdditionalNER(JCas jcas, DrugMentionAnnotation tokenDrugNER, DrugChangeStatusAnnotation drugChangeStatus, int beginSpan, int endSpan, int count, java.util.List globalNER) throws java.lang.Exception
java.lang.Exception
private BaseToken adaptToFSMBaseToken(BaseToken obj) throws java.lang.Exception
java.lang.Exception
private int[] findNextDrugEntityPost(int spanLength, int[][] elementSpan, MedicationMention nea, int endPhrase)
private int[] findNextDrugEntityPre(int spanLength, int[][] elementSpan, MedicationMention nea, int priorDrugEnd)
private boolean findNextParenRelativeToNE(int spanLength, int[][] elementSpan, MedicationMention nea, int priorDrugEnd, int startOffset)
private boolean findNextParenRelativeToElement(int spanLength, int[][] elementSpan, Annotation nea, int parenEnd, int startOffset)
private boolean hasMultipleDrugsInSpan(JCas jcas, int begin, int end)
jcas
- begin
- end
- private boolean hasMultipleElementsInSpan(JCas jcas, int begin, int end)
jcas
- begin
- end
- private int[] getSentenceSpanContainingGivenSpan(JCas jcas, int begin, int end)
jcas
- begin
- end
- private int[] getNarrativeSpansContainingGivenSpanType(JCas jcas, int begin, int annotType)
jcas
- begin
- end
- private void findFSMInRange(JCas jcas, int begin, int end) throws java.lang.Exception
java.lang.Exception
private int[] findOffsetsInPattern(JCas jcas, int begin, int end, int elementType, int[][] location, boolean highest)
jcas
- begin
- end
- private int[] getAdjustedWindowSpan(JCas jcas, int begin, int end, boolean highestRange) throws java.lang.Exception
jcas
- java.lang.Exception
private int findInPattern(JCas jcas, int begin, int end, int elementType, int[][] location)
jcas
- begin
- end
- private int[][] getWindowSpan(JCas jcas, java.lang.String sectionType, int typeAnnotation, int begin, int end, boolean useBegin, int sizeArrays) throws java.lang.Exception
java.lang.Exception
private boolean multipleDrugsInSpan(JCas jcas, int begin, int end)
jcas
- begin
- end
- private boolean multipleElementsInSpan(JCas jcas, int begin, int end)
jcas
- begin
- end
- private int[][] sortSignatureElements(JCas jcas, int begin, int end, int typeAnnotation, int[] senSpan, int sizeArray)
jcas
- begin
- end
- typeAnnotation
- senSpan
- private boolean findNextParenRelativeToNE(int spanLength, int[][] elementSpan, MedicationMention nea, int priorDrugEnd)
private int lastInPattern(JCas jcas, int begin, int end, int elementType, int[][] location)
jcas
- begin
- end
- private int[] findElementRelativeToNE(int[][] elementSpan, int[][] parenthesisSpan, int elementSpanLength, int parenthesisSpanLength, int priorDrugEnd, int startWithParenNum, int endPhrase, MedicationMention nea)
private int anchorEndofline(int[][] elementSpan, int elementSpanLength, int endPhrase, MedicationMention nea)
private boolean findNextClosestRightParenRelativeToElement(int spanLength, int[][] elementSpan, Annotation nea, int beginSpan, int startOffset)