use of edu.stanford.nlp.coref.statistical.Compressor in project CoreNLP by stanfordnlp.
the class FastNeuralCorefDataExporter method main.
public static void main(String[] args) throws Exception {
Properties props = StringUtils.argsToProperties(args);
props.setProperty("coref.maxMentionDistance", "50");
props.setProperty("coref.maxMentionDistanceWithStringMatch", "1000");
props.setProperty("coref.conllOutputPath", "/Users/kevinclark/Programming/research/coref/conll-2012/output");
props.setProperty("coref.data", "/Users/kevinclark/Programming/research/coref/conll-2012");
props.setProperty("coref.scorer", "/Users/kevinclark/Programming/research/coref/conll-2012/scorer/v8.01/scorer.pl");
Dictionaries dictionaries = new Dictionaries(props);
String outputPath = "/Users/kevinclark/Programming/research/coref/data";
String dataPath = outputPath + "/raw/";
String goldClusterPath = outputPath + "/gold/";
String compressorPath = outputPath + "/";
IOUtils.ensureDir(new File(outputPath));
IOUtils.ensureDir(new File(dataPath));
IOUtils.ensureDir(new File(goldClusterPath));
IOUtils.ensureDir(new File(compressorPath));
Compressor<String> compressor = new Compressor<String>();
for (Dataset dataset : Arrays.asList(Dataset.TRAIN, Dataset.DEV, Dataset.TEST)) {
CorefProperties.setInput(props, dataset);
System.out.println(CorefProperties.getInputPath(props));
new FastNeuralCorefDataExporter(props, dictionaries, compressor, dataPath + dataset.toString().toLowerCase(), goldClusterPath + dataset.toString().toLowerCase()).run(props, dictionaries);
}
writeCompressor(compressor, compressorPath + "/compression");
}
use of edu.stanford.nlp.coref.statistical.Compressor in project CoreNLP by stanfordnlp.
the class FastNeuralCorefAlgorithm method runCoref.
@Override
public void runCoref(Document document) {
Map<Integer, List<Integer>> mentionToCandidateAntecedents = CorefUtils.heuristicFilter(CorefUtils.getSortedMentions(document), maxMentionDistance, maxMentionDistanceWithStringMatch);
Map<Pair<Integer, Integer>, Boolean> mentionPairs = new HashMap<>();
for (Map.Entry<Integer, List<Integer>> e : mentionToCandidateAntecedents.entrySet()) {
for (int m1 : e.getValue()) {
mentionPairs.put(new Pair<>(m1, e.getKey()), true);
}
}
Compressor<String> compressor = new Compressor<>();
DocumentExamples examples = featureExtractor.extract(0, document, mentionPairs, compressor);
Counter<Pair<Integer, Integer>> pairwiseScores = new ClassicCounter<>();
// We cache representations for mentions so we compute them O(n) rather than O(n^2) times
Map<Integer, SimpleMatrix> antecedentCache = new HashMap<>();
Map<Integer, SimpleMatrix> anaphorCache = new HashMap<>();
// Score all mention pairs on how likely they are to be coreferent
for (Example mentionPair : examples.examples) {
if (Thread.interrupted()) {
// Allow interrupting
throw new RuntimeInterruptedException();
}
pairwiseScores.incrementCount(new Pair<>(mentionPair.mentionId1, mentionPair.mentionId2), model.score(document.predictedMentionsByID.get(mentionPair.mentionId1), document.predictedMentionsByID.get(mentionPair.mentionId2), compressor.uncompress(examples.mentionFeatures.get(mentionPair.mentionId1)), compressor.uncompress(examples.mentionFeatures.get(mentionPair.mentionId2)), compressor.uncompress(mentionPair.pairwiseFeatures), antecedentCache, anaphorCache));
}
// Score each mention for anaphoricity
for (int anaphorId : mentionToCandidateAntecedents.keySet()) {
if (Thread.interrupted()) {
// Allow interrupting
throw new RuntimeInterruptedException();
}
pairwiseScores.incrementCount(new Pair<>(-1, anaphorId), model.score(null, document.predictedMentionsByID.get(anaphorId), null, compressor.uncompress(examples.mentionFeatures.get(anaphorId)), null, antecedentCache, anaphorCache));
}
// Link each mention to the highest-scoring candidate antecedent
for (Map.Entry<Integer, List<Integer>> e : mentionToCandidateAntecedents.entrySet()) {
int antecedent = -1;
int anaphor = e.getKey();
double bestScore = pairwiseScores.getCount(new Pair<>(-1, anaphor)) - 50 * (greedyness - 0.5);
for (int ca : e.getValue()) {
double score = pairwiseScores.getCount(new Pair<>(ca, anaphor));
if (score > bestScore) {
bestScore = score;
antecedent = ca;
}
}
if (antecedent > 0) {
CorefUtils.mergeCoreferenceClusters(new Pair<>(antecedent, anaphor), document);
}
}
}
Aggregations