Search in sources :

Example 1 with Compressor

use of edu.stanford.nlp.coref.statistical.Compressor in project CoreNLP by stanfordnlp.

the class FastNeuralCorefDataExporter method main.

public static void main(String[] args) throws Exception {
    Properties props = StringUtils.argsToProperties(args);
    props.setProperty("coref.maxMentionDistance", "50");
    props.setProperty("coref.maxMentionDistanceWithStringMatch", "1000");
    props.setProperty("coref.conllOutputPath", "/Users/kevinclark/Programming/research/coref/conll-2012/output");
    props.setProperty("coref.data", "/Users/kevinclark/Programming/research/coref/conll-2012");
    props.setProperty("coref.scorer", "/Users/kevinclark/Programming/research/coref/conll-2012/scorer/v8.01/scorer.pl");
    Dictionaries dictionaries = new Dictionaries(props);
    String outputPath = "/Users/kevinclark/Programming/research/coref/data";
    String dataPath = outputPath + "/raw/";
    String goldClusterPath = outputPath + "/gold/";
    String compressorPath = outputPath + "/";
    IOUtils.ensureDir(new File(outputPath));
    IOUtils.ensureDir(new File(dataPath));
    IOUtils.ensureDir(new File(goldClusterPath));
    IOUtils.ensureDir(new File(compressorPath));
    Compressor<String> compressor = new Compressor<String>();
    for (Dataset dataset : Arrays.asList(Dataset.TRAIN, Dataset.DEV, Dataset.TEST)) {
        CorefProperties.setInput(props, dataset);
        System.out.println(CorefProperties.getInputPath(props));
        new FastNeuralCorefDataExporter(props, dictionaries, compressor, dataPath + dataset.toString().toLowerCase(), goldClusterPath + dataset.toString().toLowerCase()).run(props, dictionaries);
    }
    writeCompressor(compressor, compressorPath + "/compression");
}
Also used : Dictionaries(edu.stanford.nlp.coref.data.Dictionaries) Dataset(edu.stanford.nlp.coref.CorefProperties.Dataset) Compressor(edu.stanford.nlp.coref.statistical.Compressor) Properties(java.util.Properties) StatisticalCorefProperties(edu.stanford.nlp.coref.statistical.StatisticalCorefProperties) CorefProperties(edu.stanford.nlp.coref.CorefProperties) File(java.io.File)

Example 2 with Compressor

use of edu.stanford.nlp.coref.statistical.Compressor in project CoreNLP by stanfordnlp.

the class FastNeuralCorefAlgorithm method runCoref.

@Override
public void runCoref(Document document) {
    Map<Integer, List<Integer>> mentionToCandidateAntecedents = CorefUtils.heuristicFilter(CorefUtils.getSortedMentions(document), maxMentionDistance, maxMentionDistanceWithStringMatch);
    Map<Pair<Integer, Integer>, Boolean> mentionPairs = new HashMap<>();
    for (Map.Entry<Integer, List<Integer>> e : mentionToCandidateAntecedents.entrySet()) {
        for (int m1 : e.getValue()) {
            mentionPairs.put(new Pair<>(m1, e.getKey()), true);
        }
    }
    Compressor<String> compressor = new Compressor<>();
    DocumentExamples examples = featureExtractor.extract(0, document, mentionPairs, compressor);
    Counter<Pair<Integer, Integer>> pairwiseScores = new ClassicCounter<>();
    // We cache representations for mentions so we compute them O(n) rather than O(n^2) times
    Map<Integer, SimpleMatrix> antecedentCache = new HashMap<>();
    Map<Integer, SimpleMatrix> anaphorCache = new HashMap<>();
    // Score all mention pairs on how likely they are to be coreferent
    for (Example mentionPair : examples.examples) {
        if (Thread.interrupted()) {
            // Allow interrupting
            throw new RuntimeInterruptedException();
        }
        pairwiseScores.incrementCount(new Pair<>(mentionPair.mentionId1, mentionPair.mentionId2), model.score(document.predictedMentionsByID.get(mentionPair.mentionId1), document.predictedMentionsByID.get(mentionPair.mentionId2), compressor.uncompress(examples.mentionFeatures.get(mentionPair.mentionId1)), compressor.uncompress(examples.mentionFeatures.get(mentionPair.mentionId2)), compressor.uncompress(mentionPair.pairwiseFeatures), antecedentCache, anaphorCache));
    }
    // Score each mention for anaphoricity
    for (int anaphorId : mentionToCandidateAntecedents.keySet()) {
        if (Thread.interrupted()) {
            // Allow interrupting
            throw new RuntimeInterruptedException();
        }
        pairwiseScores.incrementCount(new Pair<>(-1, anaphorId), model.score(null, document.predictedMentionsByID.get(anaphorId), null, compressor.uncompress(examples.mentionFeatures.get(anaphorId)), null, antecedentCache, anaphorCache));
    }
    // Link each mention to the highest-scoring candidate antecedent
    for (Map.Entry<Integer, List<Integer>> e : mentionToCandidateAntecedents.entrySet()) {
        int antecedent = -1;
        int anaphor = e.getKey();
        double bestScore = pairwiseScores.getCount(new Pair<>(-1, anaphor)) - 50 * (greedyness - 0.5);
        for (int ca : e.getValue()) {
            double score = pairwiseScores.getCount(new Pair<>(ca, anaphor));
            if (score > bestScore) {
                bestScore = score;
                antecedent = ca;
            }
        }
        if (antecedent > 0) {
            CorefUtils.mergeCoreferenceClusters(new Pair<>(antecedent, anaphor), document);
        }
    }
}
Also used : HashMap(java.util.HashMap) RuntimeInterruptedException(edu.stanford.nlp.util.RuntimeInterruptedException) Compressor(edu.stanford.nlp.coref.statistical.Compressor) DocumentExamples(edu.stanford.nlp.coref.statistical.DocumentExamples) SimpleMatrix(org.ejml.simple.SimpleMatrix) Example(edu.stanford.nlp.coref.statistical.Example) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) Pair(edu.stanford.nlp.util.Pair)

Aggregations

Compressor (edu.stanford.nlp.coref.statistical.Compressor)2 CorefProperties (edu.stanford.nlp.coref.CorefProperties)1 Dataset (edu.stanford.nlp.coref.CorefProperties.Dataset)1 Dictionaries (edu.stanford.nlp.coref.data.Dictionaries)1 DocumentExamples (edu.stanford.nlp.coref.statistical.DocumentExamples)1 Example (edu.stanford.nlp.coref.statistical.Example)1 StatisticalCorefProperties (edu.stanford.nlp.coref.statistical.StatisticalCorefProperties)1 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)1 Pair (edu.stanford.nlp.util.Pair)1 RuntimeInterruptedException (edu.stanford.nlp.util.RuntimeInterruptedException)1 File (java.io.File)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Properties (java.util.Properties)1 SimpleMatrix (org.ejml.simple.SimpleMatrix)1