Search in sources :

Example 1 with DeterministicCorefSieve

use of edu.stanford.nlp.dcoref.sievepasses.DeterministicCorefSieve in project CoreNLP by stanfordnlp.

the class SieveCoreferenceSystem method printTopK.

/** Print logs for error analysis */
public void printTopK(Logger logger, Document document, Semantics semantics) {
    List<List<Mention>> orderedMentionsBySentence = document.getOrderedMentions();
    Map<Integer, CorefCluster> corefClusters = document.corefClusters;
    Map<Mention, IntTuple> positions = document.allPositions;
    Map<Integer, Mention> golds = document.allGoldMentions;
    logger.fine("=======ERROR ANALYSIS=========================================================");
    // Temporary sieve for getting ordered antecedents
    DeterministicCorefSieve tmpSieve = new ExactStringMatch();
    for (int i = 0; i < orderedMentionsBySentence.size(); i++) {
        List<Mention> orderedMentions = orderedMentionsBySentence.get(i);
        for (int j = 0; j < orderedMentions.size(); j++) {
            Mention m = orderedMentions.get(j);
            logger.fine("=========Line: " + i + "\tmention: " + j + "=======================================================");
            logger.fine(m.spanToString() + "\tmentionID: " + m.mentionID + "\tcorefClusterID: " + m.corefClusterID + "\tgoldCorefClusterID: " + m.goldCorefClusterID);
            CorefCluster corefCluster = corefClusters.get(m.corefClusterID);
            if (corefCluster != null) {
                corefCluster.printCorefCluster(logger);
            } else {
                logger.finer("CANNOT find coref cluster for cluster " + m.corefClusterID);
            }
            logger.fine("-------------------------------------------------------");
            boolean oneRecallErrorPrinted = false;
            boolean onePrecisionErrorPrinted = false;
            boolean alreadyChoose = false;
            for (int sentJ = i; sentJ >= 0; sentJ--) {
                List<Mention> l = tmpSieve.getOrderedAntecedents(sentJ, i, orderedMentions, orderedMentionsBySentence, m, j, corefClusters, dictionaries);
                // Sort mentions by length whenever we have two mentions beginning at the same position and having the same head
                for (int ii = 0; ii < l.size(); ii++) {
                    for (int jj = 0; jj < l.size(); jj++) {
                        if (l.get(ii).headString.equals(l.get(jj).headString) && l.get(ii).startIndex == l.get(jj).startIndex && l.get(ii).sameSentence(l.get(jj)) && jj > ii && l.get(ii).spanToString().length() > l.get(jj).spanToString().length()) {
                            logger.finest("FLIPPED: " + l.get(ii).spanToString() + "(" + ii + "), " + l.get(jj).spanToString() + "(" + jj + ")");
                            l.set(jj, l.set(ii, l.get(jj)));
                        }
                    }
                }
                logger.finest("Candidates in sentence #" + sentJ + " for mention: " + m.spanToString());
                for (int ii = 0; ii < l.size(); ii++) {
                    logger.finest("\tCandidate #" + ii + ": " + l.get(ii).spanToString());
                }
                for (Mention antecedent : l) {
                    boolean chosen = (m.corefClusterID == antecedent.corefClusterID);
                    IntTuple src = new IntTuple(2);
                    src.set(0, i);
                    src.set(1, j);
                    IntTuple ant = positions.get(antecedent);
                    if (ant == null)
                        continue;
                    //correct=(chosen==goldLinks.contains(new Pair<IntTuple, IntTuple>(src,ant)));
                    boolean coreferent = golds.containsKey(m.mentionID) && golds.containsKey(antecedent.mentionID) && (golds.get(m.mentionID).goldCorefClusterID == golds.get(antecedent.mentionID).goldCorefClusterID);
                    boolean correct = (chosen == coreferent);
                    String chosenness = chosen ? "Chosen" : "Not Chosen";
                    String correctness = correct ? "Correct" : "Incorrect";
                    logger.fine("\t" + correctness + "\t\t" + chosenness + "\t" + antecedent.spanToString());
                    CorefCluster mC = corefClusters.get(m.corefClusterID);
                    CorefCluster aC = corefClusters.get(antecedent.corefClusterID);
                    if (chosen && !correct && !onePrecisionErrorPrinted && !alreadyChoose) {
                        onePrecisionErrorPrinted = true;
                        printLinkWithContext(logger, "\nPRECISION ERROR ", src, ant, document, semantics);
                        logger.fine("END of PRECISION ERROR LOG");
                    }
                    if (!chosen && !correct && !oneRecallErrorPrinted && (!alreadyChoose || (alreadyChoose && onePrecisionErrorPrinted))) {
                        oneRecallErrorPrinted = true;
                        printLinkWithContext(logger, "\nRECALL ERROR ", src, ant, document, semantics);
                        logger.finer("cluster info: ");
                        if (mC != null) {
                            mC.printCorefCluster(logger);
                        } else {
                            logger.finer("CANNOT find coref cluster for cluster " + m.corefClusterID);
                        }
                        logger.finer("----------------------------------------------------------");
                        if (aC != null) {
                            aC.printCorefCluster(logger);
                        } else {
                            logger.finer("CANNOT find coref cluster for cluster " + m.corefClusterID);
                        }
                        logger.finer("");
                        logger.fine("END of RECALL ERROR LOG");
                    }
                    if (chosen)
                        alreadyChoose = true;
                }
            }
            logger.fine("\n");
        }
    }
    logger.fine("===============================================================================");
}
Also used : DeterministicCorefSieve(edu.stanford.nlp.dcoref.sievepasses.DeterministicCorefSieve) ExactStringMatch(edu.stanford.nlp.dcoref.sievepasses.ExactStringMatch) CorefMention(edu.stanford.nlp.dcoref.CorefChain.CorefMention) ArrayList(java.util.ArrayList) List(java.util.List)

Example 2 with DeterministicCorefSieve

use of edu.stanford.nlp.dcoref.sievepasses.DeterministicCorefSieve in project CoreNLP by stanfordnlp.

the class SieveCoreferenceSystem method corefReturnHybridOutput.

public Map<Integer, edu.stanford.nlp.coref.data.CorefChain> corefReturnHybridOutput(Document document) throws Exception {
    // Multi-pass sieve coreference resolution
    for (int i = 0; i < sieves.length; i++) {
        currentSieve = i;
        DeterministicCorefSieve sieve = sieves[i];
        // Do coreference resolution using this pass
        coreference(document, sieve);
    }
    // post processing (e.g., removing singletons, appositions for conll)
    if ((!Constants.USE_GOLD_MENTIONS && doPostProcessing) || replicateCoNLL)
        postProcessing(document);
    // coref system output: edu.stanford.nlp.hcoref.data.CorefChain
    Map<Integer, edu.stanford.nlp.coref.data.CorefChain> result = Generics.newHashMap();
    for (CorefCluster c : document.corefClusters.values()) {
        // build mentionsMap and represents
        Map<IntPair, Set<edu.stanford.nlp.coref.data.CorefChain.CorefMention>> mentionsMap = Generics.newHashMap();
        IntPair keyPair = new IntPair(0, 0);
        mentionsMap.put(keyPair, new HashSet<>());
        Mention represents = null;
        edu.stanford.nlp.coref.data.CorefChain.CorefMention representsHybridVersion = null;
        for (Mention mention : c.getCorefMentions()) {
            // convert dcoref CorefMention to hcoref CorefMention
            //IntPair mentionPosition = new IntPair(mention.sentNum, mention.headIndex);
            IntTuple mentionPosition = document.positions.get(mention);
            CorefMention dcorefMention = new CorefMention(mention, mentionPosition);
            // tokens need the hcoref version of CorefClusterIdAnnotation
            mention.headWord.set(edu.stanford.nlp.coref.CorefCoreAnnotations.CorefClusterIdAnnotation.class, mention.corefClusterID);
            // drop the dcoref version of CorefClusterIdAnnotation
            mention.headWord.remove(CorefCoreAnnotations.CorefClusterIdAnnotation.class);
            // make the hcoref mention
            edu.stanford.nlp.coref.data.CorefChain.CorefMention hcorefMention = new edu.stanford.nlp.coref.data.CorefChain.CorefMention(edu.stanford.nlp.coref.data.Dictionaries.MentionType.valueOf(dcorefMention.mentionType.name()), edu.stanford.nlp.coref.data.Dictionaries.Number.valueOf(dcorefMention.number.name()), edu.stanford.nlp.coref.data.Dictionaries.Gender.valueOf(dcorefMention.gender.name()), edu.stanford.nlp.coref.data.Dictionaries.Animacy.valueOf(dcorefMention.animacy.name()), dcorefMention.startIndex, dcorefMention.endIndex, dcorefMention.headIndex, dcorefMention.corefClusterID, dcorefMention.mentionID, dcorefMention.sentNum, dcorefMention.position, dcorefMention.mentionSpan);
            mentionsMap.get(keyPair).add(hcorefMention);
            if (mention.moreRepresentativeThan(represents)) {
                represents = mention;
                representsHybridVersion = hcorefMention;
            }
        }
        edu.stanford.nlp.coref.data.CorefChain hybridCorefChain = new edu.stanford.nlp.coref.data.CorefChain(c.clusterID, mentionsMap, representsHybridVersion);
        result.put(c.clusterID, hybridCorefChain);
    }
    return result;
}
Also used : TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) Set(java.util.Set) DeterministicCorefSieve(edu.stanford.nlp.dcoref.sievepasses.DeterministicCorefSieve) CorefMention(edu.stanford.nlp.dcoref.CorefChain.CorefMention) CorefMention(edu.stanford.nlp.dcoref.CorefChain.CorefMention)

Example 3 with DeterministicCorefSieve

use of edu.stanford.nlp.dcoref.sievepasses.DeterministicCorefSieve in project CoreNLP by stanfordnlp.

the class SieveCoreferenceSystem method optimizeSieveOrdering.

/**
   * Given a set of sieves, select an optimal ordering for the sieves
   * by iterating over sieves, and selecting the one that gives the best score and
   *   adding sieves one at a time until no more sieves left
   */
public void optimizeSieveOrdering(MentionExtractor mentionExtractor, Properties props, String timestamp) throws Exception {
    logger.info("=============SIEVE OPTIMIZATION START ====================");
    logger.info("Optimize sieves using score: " + optimizeScoreType);
    FileFilter scoreFilesFilter = new FileFilter() {

        @Override
        public boolean accept(File file) {
            return file.getAbsolutePath().endsWith(".score");
        }

        public String toString() {
            return ".score";
        }
    };
    Pattern scoreFilePattern = Pattern.compile(".*sieves\\.(\\d+)\\.(\\d+).score");
    String runDistributedCmd = props.getProperty(Constants.RUN_DIST_CMD_PROP);
    String mainWorkDirPath = props.getProperty(Constants.RUN_DIST_CMD_WORK_DIR, "workdir") + "-" + timestamp + File.separator;
    DeterministicCorefSieve[] origSieves = sieves;
    String[] origSieveNames = sieveClassNames;
    Set<Integer> remainingSieveIndices = Generics.newHashSet();
    for (int i = 0; i < origSieves.length; i++) {
        remainingSieveIndices.add(i);
    }
    List<Integer> optimizedOrdering = new ArrayList<>();
    while (!remainingSieveIndices.isEmpty()) {
        // initialize array of current sieves
        int curSievesNumber = optimizedOrdering.size();
        sieves = new DeterministicCorefSieve[curSievesNumber + 1];
        sieveClassNames = new String[curSievesNumber + 1];
        for (int i = 0; i < curSievesNumber; i++) {
            sieves[i] = origSieves[optimizedOrdering.get(i)];
            sieveClassNames[i] = origSieveNames[optimizedOrdering.get(i)];
        }
        logger.info("*** Optimizing Sieve ordering for pass " + curSievesNumber + " ***");
        // Get list of sieves that we can pick from for the next sieve
        Set<Integer> selectableSieveIndices = new TreeSet<>(remainingSieveIndices);
        // Based on ordering constraints remove sieves from options
        if (sievesKeepOrder != null) {
            for (Pair<Integer, Integer> ko : sievesKeepOrder) {
                if (ko.second() < 0) {
                    if (remainingSieveIndices.contains(ko.first())) {
                        logger.info("Restrict selection to " + origSieveNames[ko.first()] + " because of constraint " + toSieveOrderConstraintString(ko, origSieveNames));
                        selectableSieveIndices = Generics.newHashSet(1);
                        selectableSieveIndices.add(ko.first());
                        break;
                    }
                } else if (ko.first() < 0 && remainingSieveIndices.size() > 1) {
                    if (remainingSieveIndices.contains(ko.second())) {
                        logger.info("Remove selection " + origSieveNames[ko.second()] + " because of constraint " + toSieveOrderConstraintString(ko, origSieveNames));
                        selectableSieveIndices.remove(ko.second());
                    }
                } else if (remainingSieveIndices.contains(ko.first())) {
                    if (remainingSieveIndices.contains(ko.second())) {
                        logger.info("Remove selection " + origSieveNames[ko.second()] + " because of constraint " + toSieveOrderConstraintString(ko, origSieveNames));
                        selectableSieveIndices.remove(ko.second());
                    }
                }
            }
        }
        if (selectableSieveIndices.isEmpty()) {
            throw new RuntimeException("Unable to find sieve ordering to satisfy all ordering constraints!!!!");
        }
        int selected = -1;
        if (selectableSieveIndices.size() > 1) {
            // Go through remaining sieves and see how well they do
            List<Pair<Double, Integer>> scores = new ArrayList<>();
            if (runDistributedCmd != null) {
                String workDirPath = mainWorkDirPath + curSievesNumber + File.separator;
                File workDir = new File(workDirPath);
                workDir.mkdirs();
                workDirPath = workDir.getAbsolutePath() + File.separator;
                // Start jobs
                for (int potentialSieveIndex : selectableSieveIndices) {
                    String sieveSelectionId = curSievesNumber + "." + potentialSieveIndex;
                    String jobDirPath = workDirPath + sieveSelectionId + File.separator;
                    File jobDir = new File(jobDirPath);
                    jobDir.mkdirs();
                    Properties newProps = new Properties();
                    for (String key : props.stringPropertyNames()) {
                        String value = props.getProperty(key);
                        value = value.replaceAll("\\$\\{JOBDIR\\}", jobDirPath);
                        newProps.setProperty(key, value);
                    }
                    // try this sieve and see how well it works
                    sieves[curSievesNumber] = origSieves[potentialSieveIndex];
                    sieveClassNames[curSievesNumber] = origSieveNames[potentialSieveIndex];
                    newProps.setProperty(Constants.OPTIMIZE_SIEVES_PROP, "false");
                    newProps.setProperty(Constants.SCORE_PROP, "true");
                    newProps.setProperty(Constants.SIEVES_PROP, StringUtils.join(sieveClassNames, ","));
                    newProps.setProperty(Constants.LOG_PROP, jobDirPath + "sieves." + sieveSelectionId + ".log");
                    newProps.setProperty(Constants.SCORE_FILE_PROP, workDirPath + "sieves." + sieveSelectionId + ".score");
                    if (Constants.PRINT_CONLL_OUTPUT || replicateCoNLL) {
                        newProps.setProperty(Constants.CONLL_OUTPUT_PROP, jobDirPath + "sieves." + sieveSelectionId + ".conlloutput");
                    }
                    String distCmd = newProps.getProperty(Constants.RUN_DIST_CMD_PROP, runDistributedCmd);
                    runAndScoreCorefDist(distCmd, newProps, workDirPath + "sieves." + sieveSelectionId + ".props");
                }
                // Wait for jobs to finish and collect scores
                waitForFiles(workDir, scoreFilesFilter, selectableSieveIndices.size());
                // Get scores
                File[] scoreFiles = workDir.listFiles(scoreFilesFilter);
                for (File file : scoreFiles) {
                    Matcher m = scoreFilePattern.matcher(file.getName());
                    if (m.matches()) {
                        int potentialSieveIndex = Integer.parseInt(m.group(2));
                        String text = IOUtils.slurpFile(file);
                        double score = Double.parseDouble(text);
                        // keeps scores so we can select best score and log them
                        scores.add(new Pair<>(score, potentialSieveIndex));
                    } else {
                        throw new RuntimeException("Bad score file name: " + file);
                    }
                }
            } else {
                for (int potentialSieveIndex : selectableSieveIndices) {
                    // try this sieve and see how well it works
                    sieves[curSievesNumber] = origSieves[potentialSieveIndex];
                    sieveClassNames[curSievesNumber] = origSieveNames[potentialSieveIndex];
                    logger.info("Trying sieve " + curSievesNumber + "=" + sieveClassNames[curSievesNumber] + ": ");
                    logger.info(" Trying sieves: " + StringUtils.join(sieveClassNames, ","));
                    double score = runAndScoreCoref(this, mentionExtractor, props, timestamp);
                    // keeps scores so we can select best score and log them
                    scores.add(new Pair<>(score, potentialSieveIndex));
                    logger.info(" Trying sieves: " + StringUtils.join(sieveClassNames, ","));
                    logger.info(" Trying sieves score: " + score);
                }
            }
            // Select bestScore
            double bestScore = -1;
            for (Pair<Double, Integer> p : scores) {
                if (selected < 0 || p.first() > bestScore) {
                    bestScore = p.first();
                    selected = p.second();
                }
            }
            // log ordered scores
            Collections.sort(scores);
            Collections.reverse(scores);
            logger.info("Ordered sieves");
            for (Pair<Double, Integer> p : scores) {
                logger.info("Sieve optimization pass " + curSievesNumber + " scores: Sieve=" + origSieveNames[p.second()] + ", score=" + p.first());
            }
        } else {
            // Only one sieve
            logger.info("Only one choice for next sieve");
            selected = selectableSieveIndices.iterator().next();
        }
        // log sieve we are adding
        sieves[curSievesNumber] = origSieves[selected];
        sieveClassNames[curSievesNumber] = origSieveNames[selected];
        logger.info("Adding sieve " + curSievesNumber + "=" + sieveClassNames[curSievesNumber] + " to existing sieves: ");
        logger.info(" Current Sieves: " + StringUtils.join(sieveClassNames, ","));
        // select optimal sieve and add it to our optimized ordering
        optimizedOrdering.add(selected);
        remainingSieveIndices.remove(selected);
    }
    logger.info("Final Sieve Ordering: " + StringUtils.join(sieveClassNames, ","));
    logger.info("=============SIEVE OPTIMIZATION DONE ====================");
}
Also used : Pattern(java.util.regex.Pattern) DeterministicCorefSieve(edu.stanford.nlp.dcoref.sievepasses.DeterministicCorefSieve) Matcher(java.util.regex.Matcher) ArrayList(java.util.ArrayList) Properties(java.util.Properties) TreeSet(java.util.TreeSet) FileFilter(java.io.FileFilter) File(java.io.File)

Example 4 with DeterministicCorefSieve

use of edu.stanford.nlp.dcoref.sievepasses.DeterministicCorefSieve in project CoreNLP by stanfordnlp.

the class SieveCoreferenceSystem method coref.

/**
   * Extracts coreference clusters.
   * This is the main API entry point for coreference resolution.
   * Return a map from CorefChain ID to corresponding CorefChain.
   * @throws Exception
   */
public Map<Integer, CorefChain> coref(Document document) throws Exception {
    // Multi-pass sieve coreference resolution
    for (int i = 0; i < sieves.length; i++) {
        currentSieve = i;
        DeterministicCorefSieve sieve = sieves[i];
        // Do coreference resolution using this pass
        coreference(document, sieve);
    }
    // post processing (e.g., removing singletons, appositions for conll)
    if ((!Constants.USE_GOLD_MENTIONS && doPostProcessing) || replicateCoNLL)
        postProcessing(document);
    // coref system output: CorefChain
    Map<Integer, CorefChain> result = Generics.newHashMap();
    for (CorefCluster c : document.corefClusters.values()) {
        result.put(c.clusterID, new CorefChain(c, document.positions));
    }
    return result;
}
Also used : DeterministicCorefSieve(edu.stanford.nlp.dcoref.sievepasses.DeterministicCorefSieve)

Aggregations

DeterministicCorefSieve (edu.stanford.nlp.dcoref.sievepasses.DeterministicCorefSieve)4 CorefMention (edu.stanford.nlp.dcoref.CorefChain.CorefMention)2 ArrayList (java.util.ArrayList)2 TreeSet (java.util.TreeSet)2 ExactStringMatch (edu.stanford.nlp.dcoref.sievepasses.ExactStringMatch)1 File (java.io.File)1 FileFilter (java.io.FileFilter)1 HashSet (java.util.HashSet)1 LinkedHashSet (java.util.LinkedHashSet)1 List (java.util.List)1 Properties (java.util.Properties)1 Set (java.util.Set)1 Matcher (java.util.regex.Matcher)1 Pattern (java.util.regex.Pattern)1