use of edu.stanford.nlp.dcoref.sievepasses.DeterministicCorefSieve in project CoreNLP by stanfordnlp.
the class SieveCoreferenceSystem method printTopK.
/** Print logs for error analysis */
public void printTopK(Logger logger, Document document, Semantics semantics) {
List<List<Mention>> orderedMentionsBySentence = document.getOrderedMentions();
Map<Integer, CorefCluster> corefClusters = document.corefClusters;
Map<Mention, IntTuple> positions = document.allPositions;
Map<Integer, Mention> golds = document.allGoldMentions;
logger.fine("=======ERROR ANALYSIS=========================================================");
// Temporary sieve for getting ordered antecedents
DeterministicCorefSieve tmpSieve = new ExactStringMatch();
for (int i = 0; i < orderedMentionsBySentence.size(); i++) {
List<Mention> orderedMentions = orderedMentionsBySentence.get(i);
for (int j = 0; j < orderedMentions.size(); j++) {
Mention m = orderedMentions.get(j);
logger.fine("=========Line: " + i + "\tmention: " + j + "=======================================================");
logger.fine(m.spanToString() + "\tmentionID: " + m.mentionID + "\tcorefClusterID: " + m.corefClusterID + "\tgoldCorefClusterID: " + m.goldCorefClusterID);
CorefCluster corefCluster = corefClusters.get(m.corefClusterID);
if (corefCluster != null) {
corefCluster.printCorefCluster(logger);
} else {
logger.finer("CANNOT find coref cluster for cluster " + m.corefClusterID);
}
logger.fine("-------------------------------------------------------");
boolean oneRecallErrorPrinted = false;
boolean onePrecisionErrorPrinted = false;
boolean alreadyChoose = false;
for (int sentJ = i; sentJ >= 0; sentJ--) {
List<Mention> l = tmpSieve.getOrderedAntecedents(sentJ, i, orderedMentions, orderedMentionsBySentence, m, j, corefClusters, dictionaries);
// Sort mentions by length whenever we have two mentions beginning at the same position and having the same head
for (int ii = 0; ii < l.size(); ii++) {
for (int jj = 0; jj < l.size(); jj++) {
if (l.get(ii).headString.equals(l.get(jj).headString) && l.get(ii).startIndex == l.get(jj).startIndex && l.get(ii).sameSentence(l.get(jj)) && jj > ii && l.get(ii).spanToString().length() > l.get(jj).spanToString().length()) {
logger.finest("FLIPPED: " + l.get(ii).spanToString() + "(" + ii + "), " + l.get(jj).spanToString() + "(" + jj + ")");
l.set(jj, l.set(ii, l.get(jj)));
}
}
}
logger.finest("Candidates in sentence #" + sentJ + " for mention: " + m.spanToString());
for (int ii = 0; ii < l.size(); ii++) {
logger.finest("\tCandidate #" + ii + ": " + l.get(ii).spanToString());
}
for (Mention antecedent : l) {
boolean chosen = (m.corefClusterID == antecedent.corefClusterID);
IntTuple src = new IntTuple(2);
src.set(0, i);
src.set(1, j);
IntTuple ant = positions.get(antecedent);
if (ant == null)
continue;
//correct=(chosen==goldLinks.contains(new Pair<IntTuple, IntTuple>(src,ant)));
boolean coreferent = golds.containsKey(m.mentionID) && golds.containsKey(antecedent.mentionID) && (golds.get(m.mentionID).goldCorefClusterID == golds.get(antecedent.mentionID).goldCorefClusterID);
boolean correct = (chosen == coreferent);
String chosenness = chosen ? "Chosen" : "Not Chosen";
String correctness = correct ? "Correct" : "Incorrect";
logger.fine("\t" + correctness + "\t\t" + chosenness + "\t" + antecedent.spanToString());
CorefCluster mC = corefClusters.get(m.corefClusterID);
CorefCluster aC = corefClusters.get(antecedent.corefClusterID);
if (chosen && !correct && !onePrecisionErrorPrinted && !alreadyChoose) {
onePrecisionErrorPrinted = true;
printLinkWithContext(logger, "\nPRECISION ERROR ", src, ant, document, semantics);
logger.fine("END of PRECISION ERROR LOG");
}
if (!chosen && !correct && !oneRecallErrorPrinted && (!alreadyChoose || (alreadyChoose && onePrecisionErrorPrinted))) {
oneRecallErrorPrinted = true;
printLinkWithContext(logger, "\nRECALL ERROR ", src, ant, document, semantics);
logger.finer("cluster info: ");
if (mC != null) {
mC.printCorefCluster(logger);
} else {
logger.finer("CANNOT find coref cluster for cluster " + m.corefClusterID);
}
logger.finer("----------------------------------------------------------");
if (aC != null) {
aC.printCorefCluster(logger);
} else {
logger.finer("CANNOT find coref cluster for cluster " + m.corefClusterID);
}
logger.finer("");
logger.fine("END of RECALL ERROR LOG");
}
if (chosen)
alreadyChoose = true;
}
}
logger.fine("\n");
}
}
logger.fine("===============================================================================");
}
use of edu.stanford.nlp.dcoref.sievepasses.DeterministicCorefSieve in project CoreNLP by stanfordnlp.
the class SieveCoreferenceSystem method corefReturnHybridOutput.
public Map<Integer, edu.stanford.nlp.coref.data.CorefChain> corefReturnHybridOutput(Document document) throws Exception {
// Multi-pass sieve coreference resolution
for (int i = 0; i < sieves.length; i++) {
currentSieve = i;
DeterministicCorefSieve sieve = sieves[i];
// Do coreference resolution using this pass
coreference(document, sieve);
}
// post processing (e.g., removing singletons, appositions for conll)
if ((!Constants.USE_GOLD_MENTIONS && doPostProcessing) || replicateCoNLL)
postProcessing(document);
// coref system output: edu.stanford.nlp.hcoref.data.CorefChain
Map<Integer, edu.stanford.nlp.coref.data.CorefChain> result = Generics.newHashMap();
for (CorefCluster c : document.corefClusters.values()) {
// build mentionsMap and represents
Map<IntPair, Set<edu.stanford.nlp.coref.data.CorefChain.CorefMention>> mentionsMap = Generics.newHashMap();
IntPair keyPair = new IntPair(0, 0);
mentionsMap.put(keyPair, new HashSet<>());
Mention represents = null;
edu.stanford.nlp.coref.data.CorefChain.CorefMention representsHybridVersion = null;
for (Mention mention : c.getCorefMentions()) {
// convert dcoref CorefMention to hcoref CorefMention
//IntPair mentionPosition = new IntPair(mention.sentNum, mention.headIndex);
IntTuple mentionPosition = document.positions.get(mention);
CorefMention dcorefMention = new CorefMention(mention, mentionPosition);
// tokens need the hcoref version of CorefClusterIdAnnotation
mention.headWord.set(edu.stanford.nlp.coref.CorefCoreAnnotations.CorefClusterIdAnnotation.class, mention.corefClusterID);
// drop the dcoref version of CorefClusterIdAnnotation
mention.headWord.remove(CorefCoreAnnotations.CorefClusterIdAnnotation.class);
// make the hcoref mention
edu.stanford.nlp.coref.data.CorefChain.CorefMention hcorefMention = new edu.stanford.nlp.coref.data.CorefChain.CorefMention(edu.stanford.nlp.coref.data.Dictionaries.MentionType.valueOf(dcorefMention.mentionType.name()), edu.stanford.nlp.coref.data.Dictionaries.Number.valueOf(dcorefMention.number.name()), edu.stanford.nlp.coref.data.Dictionaries.Gender.valueOf(dcorefMention.gender.name()), edu.stanford.nlp.coref.data.Dictionaries.Animacy.valueOf(dcorefMention.animacy.name()), dcorefMention.startIndex, dcorefMention.endIndex, dcorefMention.headIndex, dcorefMention.corefClusterID, dcorefMention.mentionID, dcorefMention.sentNum, dcorefMention.position, dcorefMention.mentionSpan);
mentionsMap.get(keyPair).add(hcorefMention);
if (mention.moreRepresentativeThan(represents)) {
represents = mention;
representsHybridVersion = hcorefMention;
}
}
edu.stanford.nlp.coref.data.CorefChain hybridCorefChain = new edu.stanford.nlp.coref.data.CorefChain(c.clusterID, mentionsMap, representsHybridVersion);
result.put(c.clusterID, hybridCorefChain);
}
return result;
}
use of edu.stanford.nlp.dcoref.sievepasses.DeterministicCorefSieve in project CoreNLP by stanfordnlp.
the class SieveCoreferenceSystem method optimizeSieveOrdering.
/**
* Given a set of sieves, select an optimal ordering for the sieves
* by iterating over sieves, and selecting the one that gives the best score and
* adding sieves one at a time until no more sieves left
*/
public void optimizeSieveOrdering(MentionExtractor mentionExtractor, Properties props, String timestamp) throws Exception {
logger.info("=============SIEVE OPTIMIZATION START ====================");
logger.info("Optimize sieves using score: " + optimizeScoreType);
FileFilter scoreFilesFilter = new FileFilter() {
@Override
public boolean accept(File file) {
return file.getAbsolutePath().endsWith(".score");
}
public String toString() {
return ".score";
}
};
Pattern scoreFilePattern = Pattern.compile(".*sieves\\.(\\d+)\\.(\\d+).score");
String runDistributedCmd = props.getProperty(Constants.RUN_DIST_CMD_PROP);
String mainWorkDirPath = props.getProperty(Constants.RUN_DIST_CMD_WORK_DIR, "workdir") + "-" + timestamp + File.separator;
DeterministicCorefSieve[] origSieves = sieves;
String[] origSieveNames = sieveClassNames;
Set<Integer> remainingSieveIndices = Generics.newHashSet();
for (int i = 0; i < origSieves.length; i++) {
remainingSieveIndices.add(i);
}
List<Integer> optimizedOrdering = new ArrayList<>();
while (!remainingSieveIndices.isEmpty()) {
// initialize array of current sieves
int curSievesNumber = optimizedOrdering.size();
sieves = new DeterministicCorefSieve[curSievesNumber + 1];
sieveClassNames = new String[curSievesNumber + 1];
for (int i = 0; i < curSievesNumber; i++) {
sieves[i] = origSieves[optimizedOrdering.get(i)];
sieveClassNames[i] = origSieveNames[optimizedOrdering.get(i)];
}
logger.info("*** Optimizing Sieve ordering for pass " + curSievesNumber + " ***");
// Get list of sieves that we can pick from for the next sieve
Set<Integer> selectableSieveIndices = new TreeSet<>(remainingSieveIndices);
// Based on ordering constraints remove sieves from options
if (sievesKeepOrder != null) {
for (Pair<Integer, Integer> ko : sievesKeepOrder) {
if (ko.second() < 0) {
if (remainingSieveIndices.contains(ko.first())) {
logger.info("Restrict selection to " + origSieveNames[ko.first()] + " because of constraint " + toSieveOrderConstraintString(ko, origSieveNames));
selectableSieveIndices = Generics.newHashSet(1);
selectableSieveIndices.add(ko.first());
break;
}
} else if (ko.first() < 0 && remainingSieveIndices.size() > 1) {
if (remainingSieveIndices.contains(ko.second())) {
logger.info("Remove selection " + origSieveNames[ko.second()] + " because of constraint " + toSieveOrderConstraintString(ko, origSieveNames));
selectableSieveIndices.remove(ko.second());
}
} else if (remainingSieveIndices.contains(ko.first())) {
if (remainingSieveIndices.contains(ko.second())) {
logger.info("Remove selection " + origSieveNames[ko.second()] + " because of constraint " + toSieveOrderConstraintString(ko, origSieveNames));
selectableSieveIndices.remove(ko.second());
}
}
}
}
if (selectableSieveIndices.isEmpty()) {
throw new RuntimeException("Unable to find sieve ordering to satisfy all ordering constraints!!!!");
}
int selected = -1;
if (selectableSieveIndices.size() > 1) {
// Go through remaining sieves and see how well they do
List<Pair<Double, Integer>> scores = new ArrayList<>();
if (runDistributedCmd != null) {
String workDirPath = mainWorkDirPath + curSievesNumber + File.separator;
File workDir = new File(workDirPath);
workDir.mkdirs();
workDirPath = workDir.getAbsolutePath() + File.separator;
// Start jobs
for (int potentialSieveIndex : selectableSieveIndices) {
String sieveSelectionId = curSievesNumber + "." + potentialSieveIndex;
String jobDirPath = workDirPath + sieveSelectionId + File.separator;
File jobDir = new File(jobDirPath);
jobDir.mkdirs();
Properties newProps = new Properties();
for (String key : props.stringPropertyNames()) {
String value = props.getProperty(key);
value = value.replaceAll("\\$\\{JOBDIR\\}", jobDirPath);
newProps.setProperty(key, value);
}
// try this sieve and see how well it works
sieves[curSievesNumber] = origSieves[potentialSieveIndex];
sieveClassNames[curSievesNumber] = origSieveNames[potentialSieveIndex];
newProps.setProperty(Constants.OPTIMIZE_SIEVES_PROP, "false");
newProps.setProperty(Constants.SCORE_PROP, "true");
newProps.setProperty(Constants.SIEVES_PROP, StringUtils.join(sieveClassNames, ","));
newProps.setProperty(Constants.LOG_PROP, jobDirPath + "sieves." + sieveSelectionId + ".log");
newProps.setProperty(Constants.SCORE_FILE_PROP, workDirPath + "sieves." + sieveSelectionId + ".score");
if (Constants.PRINT_CONLL_OUTPUT || replicateCoNLL) {
newProps.setProperty(Constants.CONLL_OUTPUT_PROP, jobDirPath + "sieves." + sieveSelectionId + ".conlloutput");
}
String distCmd = newProps.getProperty(Constants.RUN_DIST_CMD_PROP, runDistributedCmd);
runAndScoreCorefDist(distCmd, newProps, workDirPath + "sieves." + sieveSelectionId + ".props");
}
// Wait for jobs to finish and collect scores
waitForFiles(workDir, scoreFilesFilter, selectableSieveIndices.size());
// Get scores
File[] scoreFiles = workDir.listFiles(scoreFilesFilter);
for (File file : scoreFiles) {
Matcher m = scoreFilePattern.matcher(file.getName());
if (m.matches()) {
int potentialSieveIndex = Integer.parseInt(m.group(2));
String text = IOUtils.slurpFile(file);
double score = Double.parseDouble(text);
// keeps scores so we can select best score and log them
scores.add(new Pair<>(score, potentialSieveIndex));
} else {
throw new RuntimeException("Bad score file name: " + file);
}
}
} else {
for (int potentialSieveIndex : selectableSieveIndices) {
// try this sieve and see how well it works
sieves[curSievesNumber] = origSieves[potentialSieveIndex];
sieveClassNames[curSievesNumber] = origSieveNames[potentialSieveIndex];
logger.info("Trying sieve " + curSievesNumber + "=" + sieveClassNames[curSievesNumber] + ": ");
logger.info(" Trying sieves: " + StringUtils.join(sieveClassNames, ","));
double score = runAndScoreCoref(this, mentionExtractor, props, timestamp);
// keeps scores so we can select best score and log them
scores.add(new Pair<>(score, potentialSieveIndex));
logger.info(" Trying sieves: " + StringUtils.join(sieveClassNames, ","));
logger.info(" Trying sieves score: " + score);
}
}
// Select bestScore
double bestScore = -1;
for (Pair<Double, Integer> p : scores) {
if (selected < 0 || p.first() > bestScore) {
bestScore = p.first();
selected = p.second();
}
}
// log ordered scores
Collections.sort(scores);
Collections.reverse(scores);
logger.info("Ordered sieves");
for (Pair<Double, Integer> p : scores) {
logger.info("Sieve optimization pass " + curSievesNumber + " scores: Sieve=" + origSieveNames[p.second()] + ", score=" + p.first());
}
} else {
// Only one sieve
logger.info("Only one choice for next sieve");
selected = selectableSieveIndices.iterator().next();
}
// log sieve we are adding
sieves[curSievesNumber] = origSieves[selected];
sieveClassNames[curSievesNumber] = origSieveNames[selected];
logger.info("Adding sieve " + curSievesNumber + "=" + sieveClassNames[curSievesNumber] + " to existing sieves: ");
logger.info(" Current Sieves: " + StringUtils.join(sieveClassNames, ","));
// select optimal sieve and add it to our optimized ordering
optimizedOrdering.add(selected);
remainingSieveIndices.remove(selected);
}
logger.info("Final Sieve Ordering: " + StringUtils.join(sieveClassNames, ","));
logger.info("=============SIEVE OPTIMIZATION DONE ====================");
}
use of edu.stanford.nlp.dcoref.sievepasses.DeterministicCorefSieve in project CoreNLP by stanfordnlp.
the class SieveCoreferenceSystem method coref.
/**
* Extracts coreference clusters.
* This is the main API entry point for coreference resolution.
* Return a map from CorefChain ID to corresponding CorefChain.
* @throws Exception
*/
public Map<Integer, CorefChain> coref(Document document) throws Exception {
// Multi-pass sieve coreference resolution
for (int i = 0; i < sieves.length; i++) {
currentSieve = i;
DeterministicCorefSieve sieve = sieves[i];
// Do coreference resolution using this pass
coreference(document, sieve);
}
// post processing (e.g., removing singletons, appositions for conll)
if ((!Constants.USE_GOLD_MENTIONS && doPostProcessing) || replicateCoNLL)
postProcessing(document);
// coref system output: CorefChain
Map<Integer, CorefChain> result = Generics.newHashMap();
for (CorefCluster c : document.corefClusters.values()) {
result.put(c.clusterID, new CorefChain(c, document.positions));
}
return result;
}
Aggregations