use of org.ejml.simple.SimpleMatrix in project CoreNLP by stanfordnlp.
the class SentimentPipeline method outputTreeVectors.
/**
* Outputs the vectors from the tree. Counts the tree nodes the
* same as setIndexLabels.
*/
static int outputTreeVectors(PrintStream out, Tree tree, int index) {
if (tree.isLeaf()) {
return index;
}
out.print(" " + index + ":");
SimpleMatrix vector = RNNCoreAnnotations.getNodeVector(tree);
for (int i = 0; i < vector.getNumElements(); ++i) {
out.print(" " + NF.format(vector.get(i)));
}
out.println();
index++;
for (Tree child : tree.children()) {
index = outputTreeVectors(out, child, index);
}
return index;
}
use of org.ejml.simple.SimpleMatrix in project CoreNLP by stanfordnlp.
the class SentimentPipeline method outputTreeScores.
/**
* Outputs the scores from the tree. Counts the tree nodes the
* same as setIndexLabels.
*/
static int outputTreeScores(PrintStream out, Tree tree, int index) {
if (tree.isLeaf()) {
return index;
}
out.print(" " + index + ":");
SimpleMatrix vector = RNNCoreAnnotations.getPredictions(tree);
for (int i = 0; i < vector.getNumElements(); ++i) {
out.print(" " + NF.format(vector.get(i)));
}
out.println();
index++;
for (Tree child : tree.children()) {
index = outputTreeScores(out, child, index);
}
return index;
}
use of org.ejml.simple.SimpleMatrix in project CoreNLP by stanfordnlp.
the class CategoricalFeatureExtractor method getPairFeatures.
public SimpleMatrix getPairFeatures(Pair<Integer, Integer> pair, Document document, Map<Integer, List<Mention>> mentionsByHeadIndex) {
Mention m1 = document.predictedMentionsByID.get(pair.first);
Mention m2 = document.predictedMentionsByID.get(pair.second);
List<Integer> featureVals = pairwiseFeatures(document, m1, m2, dictionaries, conll);
SimpleMatrix features = new SimpleMatrix(featureVals.size(), 1);
for (int i = 0; i < featureVals.size(); i++) {
features.set(i, featureVals.get(i));
}
features = NeuralUtils.concatenate(features, encodeDistance(m2.sentNum - m1.sentNum), encodeDistance(m2.mentionNum - m1.mentionNum - 1), new SimpleMatrix(new double[][] { { m1.sentNum == m2.sentNum && m1.endIndex > m2.startIndex ? 1 : 0 } }), getMentionFeatures(m1, document, mentionsByHeadIndex), getMentionFeatures(m2, document, mentionsByHeadIndex), encodeGenre(document));
return features;
}
use of org.ejml.simple.SimpleMatrix in project CoreNLP by stanfordnlp.
the class EmbeddingExtractor method getDocumentEmbedding.
public SimpleMatrix getDocumentEmbedding(Document document) {
if (!conll) {
return new SimpleMatrix(staticWordEmbeddings.getEmbeddingSize(), 1);
}
List<CoreLabel> words = new ArrayList<>();
Set<Integer> seenSentences = new HashSet<>();
for (Mention m : document.predictedMentionsByID.values()) {
if (!seenSentences.contains(m.sentNum)) {
seenSentences.add(m.sentNum);
words.addAll(m.sentenceWords);
}
}
return getAverageEmbedding(words);
}
use of org.ejml.simple.SimpleMatrix in project CoreNLP by stanfordnlp.
the class NeuralCorefAlgorithm method runCoref.
@Override
public void runCoref(Document document) {
List<Mention> sortedMentions = CorefUtils.getSortedMentions(document);
Map<Integer, List<Mention>> mentionsByHeadIndex = new HashMap<>();
for (Mention m : sortedMentions) {
List<Mention> withIndex = mentionsByHeadIndex.get(m.headIndex);
if (withIndex == null) {
withIndex = new ArrayList<>();
mentionsByHeadIndex.put(m.headIndex, withIndex);
}
withIndex.add(m);
}
SimpleMatrix documentEmbedding = embeddingExtractor.getDocumentEmbedding(document);
Map<Integer, SimpleMatrix> antecedentEmbeddings = new HashMap<>();
Map<Integer, SimpleMatrix> anaphorEmbeddings = new HashMap<>();
Counter<Integer> anaphoricityScores = new ClassicCounter<>();
for (Mention m : sortedMentions) {
SimpleMatrix mentionEmbedding = embeddingExtractor.getMentionEmbeddings(m, documentEmbedding);
antecedentEmbeddings.put(m.mentionID, model.getAntecedentEmbedding(mentionEmbedding));
anaphorEmbeddings.put(m.mentionID, model.getAnaphorEmbedding(mentionEmbedding));
anaphoricityScores.incrementCount(m.mentionID, model.getAnaphoricityScore(mentionEmbedding, featureExtractor.getAnaphoricityFeatures(m, document, mentionsByHeadIndex)));
}
Map<Integer, List<Integer>> mentionToCandidateAntecedents = CorefUtils.heuristicFilter(sortedMentions, maxMentionDistance, maxMentionDistanceWithStringMatch);
for (Map.Entry<Integer, List<Integer>> e : mentionToCandidateAntecedents.entrySet()) {
double bestScore = anaphoricityScores.getCount(e.getKey()) - 50 * (greedyness - 0.5);
int m = e.getKey();
Integer antecedent = null;
for (int ca : e.getValue()) {
double score = model.getPairwiseScore(antecedentEmbeddings.get(ca), anaphorEmbeddings.get(m), featureExtractor.getPairFeatures(new Pair<>(ca, m), document, mentionsByHeadIndex));
if (score > bestScore) {
bestScore = score;
antecedent = ca;
}
}
if (antecedent != null) {
CorefUtils.mergeCoreferenceClusters(new Pair<>(antecedent, m), document);
}
}
}
Aggregations