public class ImputedFeatureEvaluatorImpl extends java.lang.Object implements ImputedFeatureEvaluator
The mutual information of each concept is stored in the feature_rank table. The related records in the feature_eval table have the following values:
evaluateCorpus(Parameters)
load instances, iterate through
labels
evaluateCorpusLabel(Parameters, ConceptGraph, InstanceData, String)
load concept - set[document] map for the specified label, iterate through
folds
evaluateCorpusFold(Parameters, Map, ConceptGraph, InstanceData, String, Map, int)
create raw joint distribution of each concept, compute parent joint
distributions, assign children mutual info of parents
completeJointDistroForFold(Map, Map, Set, Set, String)
computes
raw joint distribution of each concept
propagateJointDistribution(Map, Parameters, String, int, ConceptGraph, Map)
recursively compute parent joint distribution by merging joint distro of
children.
#storeChildConcepts(Parameters, String, int, ConceptGraph)
take
top ranked parent concepts, assign concepts in subtrees the mutual info of
parents. Only concepts that exist in the corpus are added (depends on
computing the infocontent of concepts with CorpusEvaluator)
Modifier and Type | Class and Description |
---|---|
class |
ImputedFeatureEvaluatorImpl.ConceptInstanceMapExtractor
fill in map of Concept Id - bin - instance ids
|
static class |
ImputedFeatureEvaluatorImpl.JointDistribution
joint distribution of concept (x) and class (y).
|
static class |
ImputedFeatureEvaluatorImpl.Parameters
We are passing around quite a few parameters.
|
ImputedFeatureEvaluator.MeasureType
Modifier and Type | Field and Description |
---|---|
protected ClassifierEvaluationDao |
classifierEvaluationDao |
protected ConceptDao |
conceptDao |
private InfoContentEvaluator |
infoContentEvaluator |
protected JdbcTemplate |
jdbcTemplate |
protected KernelUtil |
kernelUtil |
private static Log |
log |
protected NamedParameterJdbcTemplate |
namedParamJdbcTemplate |
protected PlatformTransactionManager |
transactionManager |
private java.util.Properties |
ytexProperties |
SUFFIX_IMPUTED, SUFFIX_IMPUTED_FILTERED, SUFFIX_PROP
Constructor and Description |
---|
ImputedFeatureEvaluatorImpl() |
Modifier and Type | Method and Description |
---|---|
private void |
addSubtree(java.util.Set<java.lang.String> childConcepts,
ConcRel cr)
recursively add children of cr to childConcepts
|
private ImputedFeatureEvaluatorImpl.JointDistribution |
calcMergedJointDistribution(java.util.Map<java.lang.String,ImputedFeatureEvaluatorImpl.JointDistribution> conceptJointDistroMap,
java.util.Map<java.lang.String,java.lang.Integer> conceptDistMap,
ConcRel cr,
java.util.Map<java.lang.String,ImputedFeatureEvaluatorImpl.JointDistribution> rawJointDistroMap,
java.util.Map<java.lang.String,java.util.Set<java.lang.Long>> yMargin,
java.lang.String xMerge,
double minInfo,
java.util.List<java.lang.String> path) |
private double |
calculateFoldEntropy(java.util.Map<java.lang.String,java.util.Set<java.lang.Long>> classCountMap) |
private java.util.Map<java.lang.String,ImputedFeatureEvaluatorImpl.JointDistribution> |
completeJointDistroForFold(java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.util.Set<java.lang.Long>>> conceptInstanceMap,
java.util.Map<java.lang.String,java.util.Set<java.lang.Long>> yMargin,
java.util.Set<java.lang.String> xVals,
java.util.Set<java.lang.String> yVals,
java.lang.String xLeftover)
finalize the joint distribution tables wrt a fold.
|
private void |
deleteFeatureEval(ImputedFeatureEvaluatorImpl.Parameters params,
java.lang.String label,
int foldId)
delete the feature evaluations before we insert them
|
protected static double |
entropy(double[] classProbs) |
protected static double |
entropy(java.lang.Iterable<java.lang.Double> classProbs)
calculate entropy from a list/array of probabilities
|
boolean |
evaluateCorpus(ImputedFeatureEvaluatorImpl.Parameters params) |
boolean |
evaluateCorpus(java.lang.String propFile) |
private void |
evaluateCorpusFold(ImputedFeatureEvaluatorImpl.Parameters params,
java.util.Map<java.lang.String,java.util.Set<java.lang.Long>> yMargin,
ConceptGraph cg,
InstanceData instanceData,
java.lang.String label,
java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.util.Set<java.lang.Long>>> conceptInstanceMap,
int foldId) |
private void |
evaluateCorpusLabel(ImputedFeatureEvaluatorImpl.Parameters params,
ConceptGraph cg,
InstanceData instanceData,
java.lang.String label)
evaluate corpus on label
|
ClassifierEvaluationDao |
getClassifierEvaluationDao() |
ConceptDao |
getConceptDao() |
javax.sql.DataSource |
getDataSource(javax.sql.DataSource ds) |
private int |
getFoldId(ImputedFeatureEvaluatorImpl.Parameters params,
java.lang.String label,
int run,
int fold) |
private java.util.Map<java.lang.String,java.util.Set<java.lang.Long>> |
getFoldYMargin(InstanceData instanceData,
java.lang.String label,
int run,
int fold) |
InfoContentEvaluator |
getInfoContentEvaluator() |
KernelUtil |
getKernelUtil() |
java.util.Properties |
getYtexProperties() |
private FeatureEvaluation |
initFeatureEval(ImputedFeatureEvaluatorImpl.Parameters params,
java.lang.String label,
int foldId,
java.lang.String type) |
private java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.util.Set<java.lang.Long>>> |
loadConceptInstanceMap(java.lang.String classFeatureQuery,
ConceptGraph cg,
java.lang.String label)
load the map of concept - instances
|
static void |
main(java.lang.String[] args) |
private static void |
printHelp(Options options) |
private FeatureEvaluation |
propagateJointDistribution(java.util.Map<java.lang.String,ImputedFeatureEvaluatorImpl.JointDistribution> rawJointDistroMap,
ImputedFeatureEvaluatorImpl.Parameters params,
java.lang.String label,
int foldId,
ConceptGraph cg,
java.util.Map<java.lang.String,java.util.Set<java.lang.Long>> yMargin)
'complete' the joint distribution tables wrt a fold (yMargin).
|
private java.util.List<FeatureRank> |
rank(ImputedFeatureEvaluator.MeasureType measureType,
FeatureEvaluation fe,
java.util.Map<java.lang.String,ImputedFeatureEvaluatorImpl.JointDistribution> rawJointDistro,
double yEntropy,
java.util.List<FeatureRank> featureRankList) |
private FeatureEvaluation |
saveFeatureEvaluation(java.util.Map<java.lang.String,ImputedFeatureEvaluatorImpl.JointDistribution> rawJointDistro,
ImputedFeatureEvaluatorImpl.Parameters params,
java.lang.String label,
int foldId,
double yEntropy,
java.lang.String suffix,
java.util.List<FeatureRank> listRawRanks) |
void |
setClassifierEvaluationDao(ClassifierEvaluationDao classifierEvaluationDao) |
void |
setConceptDao(ConceptDao conceptDao) |
void |
setDataSource(javax.sql.DataSource ds) |
void |
setInfoContentEvaluator(InfoContentEvaluator infoContentEvaluator) |
void |
setKernelUtil(KernelUtil kernelUtil) |
void |
setYtexProperties(java.util.Properties ytexProperties) |
void |
storeChildConcepts(java.util.List<FeatureRank> listRawRanks,
ImputedFeatureEvaluatorImpl.Parameters params,
java.lang.String label,
int foldId,
ConceptGraph cg,
boolean bAll)
save the children of the 'top' parent concepts.
|
private void |
updateChildren(FeatureRank parentConcept,
java.util.Map<java.lang.String,FeatureRank> mapChildConcept,
FeatureEvaluation fe,
ConceptGraph cg,
java.util.Map<java.lang.String,java.lang.Double> conceptICMap,
java.util.Map<java.lang.String,java.lang.Double> conceptRawEvalMap,
java.util.Map<FeatureRank,java.util.Set<FeatureRank>> childParentMap,
double imputeWeight,
double minInfo)
add the children of parentConcept to mapChildConcept.
|
private static final Log log
protected ClassifierEvaluationDao classifierEvaluationDao
protected ConceptDao conceptDao
private InfoContentEvaluator infoContentEvaluator
protected JdbcTemplate jdbcTemplate
protected KernelUtil kernelUtil
protected NamedParameterJdbcTemplate namedParamJdbcTemplate
protected PlatformTransactionManager transactionManager
private java.util.Properties ytexProperties
protected static double entropy(double[] classProbs)
protected static double entropy(java.lang.Iterable<java.lang.Double> classProbs)
classProbs
- public static void main(java.lang.String[] args) throws ParseException, java.io.IOException
ParseException
java.io.IOException
private static void printHelp(Options options)
private void addSubtree(java.util.Set<java.lang.String> childConcepts, ConcRel cr)
childConcepts
- cr
- private ImputedFeatureEvaluatorImpl.JointDistribution calcMergedJointDistribution(java.util.Map<java.lang.String,ImputedFeatureEvaluatorImpl.JointDistribution> conceptJointDistroMap, java.util.Map<java.lang.String,java.lang.Integer> conceptDistMap, ConcRel cr, java.util.Map<java.lang.String,ImputedFeatureEvaluatorImpl.JointDistribution> rawJointDistroMap, java.util.Map<java.lang.String,java.util.Set<java.lang.Long>> yMargin, java.lang.String xMerge, double minInfo, java.util.List<java.lang.String> path)
private double calculateFoldEntropy(java.util.Map<java.lang.String,java.util.Set<java.lang.Long>> classCountMap)
private java.util.Map<java.lang.String,ImputedFeatureEvaluatorImpl.JointDistribution> completeJointDistroForFold(java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.util.Set<java.lang.Long>>> conceptInstanceMap, java.util.Map<java.lang.String,java.util.Set<java.lang.Long>> yMargin, java.util.Set<java.lang.String> xVals, java.util.Set<java.lang.String> yVals, java.lang.String xLeftover)
jointDistroMap
- yMargin
- yVals
- xVals
- xLeftover
- private void deleteFeatureEval(ImputedFeatureEvaluatorImpl.Parameters params, java.lang.String label, int foldId)
params
- label
- foldId
- public boolean evaluateCorpus(ImputedFeatureEvaluatorImpl.Parameters params)
evaluateCorpus
in interface ImputedFeatureEvaluator
public boolean evaluateCorpus(java.lang.String propFile) throws java.io.IOException
evaluateCorpus
in interface ImputedFeatureEvaluator
java.io.IOException
private void evaluateCorpusFold(ImputedFeatureEvaluatorImpl.Parameters params, java.util.Map<java.lang.String,java.util.Set<java.lang.Long>> yMargin, ConceptGraph cg, InstanceData instanceData, java.lang.String label, java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.util.Set<java.lang.Long>>> conceptInstanceMap, int foldId)
private void evaluateCorpusLabel(ImputedFeatureEvaluatorImpl.Parameters params, ConceptGraph cg, InstanceData instanceData, java.lang.String label)
classFeatureQuery
- minInfo
- xVals
- xLeftover
- xMerge
- eval
- cg
- instanceData
- label
- parentConceptTopThreshold
- parentConceptEvalThreshold
- public ClassifierEvaluationDao getClassifierEvaluationDao()
public ConceptDao getConceptDao()
public javax.sql.DataSource getDataSource(javax.sql.DataSource ds)
private int getFoldId(ImputedFeatureEvaluatorImpl.Parameters params, java.lang.String label, int run, int fold)
private java.util.Map<java.lang.String,java.util.Set<java.lang.Long>> getFoldYMargin(InstanceData instanceData, java.lang.String label, int run, int fold)
public InfoContentEvaluator getInfoContentEvaluator()
public KernelUtil getKernelUtil()
public java.util.Properties getYtexProperties()
private FeatureEvaluation initFeatureEval(ImputedFeatureEvaluatorImpl.Parameters params, java.lang.String label, int foldId, java.lang.String type)
private java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.util.Set<java.lang.Long>>> loadConceptInstanceMap(java.lang.String classFeatureQuery, ConceptGraph cg, java.lang.String label)
classFeatureQuery
- cg
- label
- private FeatureEvaluation propagateJointDistribution(java.util.Map<java.lang.String,ImputedFeatureEvaluatorImpl.JointDistribution> rawJointDistroMap, ImputedFeatureEvaluatorImpl.Parameters params, java.lang.String label, int foldId, ConceptGraph cg, java.util.Map<java.lang.String,java.util.Set<java.lang.Long>> yMargin)
rawJointDistroMap
- labelEval
- cg
- yMargin
- xMerge
- minInfo
- private java.util.List<FeatureRank> rank(ImputedFeatureEvaluator.MeasureType measureType, FeatureEvaluation fe, java.util.Map<java.lang.String,ImputedFeatureEvaluatorImpl.JointDistribution> rawJointDistro, double yEntropy, java.util.List<FeatureRank> featureRankList)
private FeatureEvaluation saveFeatureEvaluation(java.util.Map<java.lang.String,ImputedFeatureEvaluatorImpl.JointDistribution> rawJointDistro, ImputedFeatureEvaluatorImpl.Parameters params, java.lang.String label, int foldId, double yEntropy, java.lang.String suffix, java.util.List<FeatureRank> listRawRanks)
public void setClassifierEvaluationDao(ClassifierEvaluationDao classifierEvaluationDao)
public void setConceptDao(ConceptDao conceptDao)
public void setDataSource(javax.sql.DataSource ds)
public void setInfoContentEvaluator(InfoContentEvaluator infoContentEvaluator)
public void setKernelUtil(KernelUtil kernelUtil)
public void setYtexProperties(java.util.Properties ytexProperties)
public void storeChildConcepts(java.util.List<FeatureRank> listRawRanks, ImputedFeatureEvaluatorImpl.Parameters params, java.lang.String label, int foldId, ConceptGraph cg, boolean bAll)
labelEval
- parentConceptTopThreshold
- parentConceptEvalThreshold
- cg
- bAll
- impute to all concepts/concepts actually in corpus. if we are
imputing to all concepts, filter by infocontent (this includes
hypernyms of concepts in the corpus). else only impute to
conrete concepts in the corpusprivate void updateChildren(FeatureRank parentConcept, java.util.Map<java.lang.String,FeatureRank> mapChildConcept, FeatureEvaluation fe, ConceptGraph cg, java.util.Map<java.lang.String,java.lang.Double> conceptICMap, java.util.Map<java.lang.String,java.lang.Double> conceptRawEvalMap, java.util.Map<FeatureRank,java.util.Set<FeatureRank>> childParentMap, double imputeWeight, double minInfo)
parentConcept
- mapChildConcept
- labelEval
- cg
- parentChildMap
- conceptRawEvalMap
-