public class Utils
extends java.lang.Object
Modifier and Type | Class and Description |
---|---|
static class |
Utils.Callback
Read event duration distributions from file.
|
Modifier and Type | Field and Description |
---|---|
static java.lang.String[] |
bins |
static java.lang.String |
durationDistributionPath |
Constructor and Description |
---|
Utils() |
Modifier and Type | Method and Description |
---|---|
static java.util.Map<java.lang.String,java.lang.Float> |
convertToDistribution(java.lang.String timeUnit)
Take a time unit and return a probability distribution
in which p(this time unit) = 1 and all others are zero.
|
static float |
expectedDuration(java.util.Map<java.lang.String,java.lang.Float> distribution)
Compute expected duration in seconds.
|
static java.lang.String |
formatDistribution(java.lang.String mentionText,
Convert duration distribution multiset to a format that's easy to parse automatically.
|
static CollectionReader |
getCollectionReader(java.util.List<java.io.File> inputFiles)
Instantiate an XMI collection reader.
|
static java.util.List<java.io.File> |
getFilesFor(java.util.List<java.lang.Integer> patientSets,
java.io.File inputDirectory)
Get files for specific sets of patients.
|
static java.lang.String |
getPosTag(JCas systemView,
Annotation annotation)
Return system generated POS tag or null if none available.
|
static java.lang.String |
getTextBetweenAnnotations(JCas jCas,
Annotation arg1,
Annotation arg2)
Get relation context.
|
static java.util.HashSet<java.lang.String> |
getTimeUnits(java.lang.String timex)
Extract time unit(s) from a temporal expression
and put in one of the eight bins above.
|
static java.lang.String |
lemmatize(java.lang.String word,
java.lang.String pos)
Lemmatize word using ClearNLP lemmatizer.
|
static void |
main(java.lang.String[] args) |
static java.lang.String |
normalizeEventText(JCas jCas,
Annotation annotation)
Keep UMLS concepts and non-verbs intact.
|
static java.lang.String |
putInBin(java.lang.String timeUnit)
Take the time unit from Bethard noramlizer
and return a coarser time unit, i.e.
|
static |
runTimexParser(java.lang.String timex)
Use Bethard normalizer to map a temporal expression to a time unit.
|
static void |
writeInstance(java.lang.String label,
java.util.List<Feature> features,
java.lang.String fileName)
Output label and list of cleartk features to a file for debugging.
|
public static final java.lang.String durationDistributionPath
public static final java.lang.String[] bins
public static java.util.HashSet<java.lang.String> getTimeUnits(java.lang.String timex)
public staticrunTimexParser(java.lang.String timex)
public static java.lang.String putInBin(java.lang.String timeUnit)
public static float expectedDuration(java.util.Map<java.lang.String,java.lang.Float> distribution)
public static java.util.Map<java.lang.String,java.lang.Float> convertToDistribution(java.lang.String timeUnit)
public static java.lang.String formatDistribution(java.lang.String mentionText,durationDistribution, java.lang.String separator, boolean normalize)
public static java.lang.String getTextBetweenAnnotations(JCas jCas, Annotation arg1, Annotation arg2)
public static java.lang.String lemmatize(java.lang.String word, java.lang.String pos) throws java.io.IOException
java.io.IOException
public static java.lang.String getPosTag(JCas systemView, Annotation annotation)
public static java.lang.String normalizeEventText(JCas jCas, Annotation annotation) throws AnalysisEngineProcessException
AnalysisEngineProcessException
public static CollectionReader getCollectionReader(java.util.List<java.io.File> inputFiles) throws java.lang.Exception
java.lang.Exception
public static java.util.List<java.io.File> getFilesFor(java.util.List<java.lang.Integer> patientSets, java.io.File inputDirectory)
public static void writeInstance(java.lang.String label, java.util.List<Feature> features, java.lang.String fileName)
public static void main(java.lang.String[] args)