use of edu.stanford.nlp.util.Pair in project CoreNLP by stanfordnlp.
the class ParentAnnotationStats method getSplitters.
private static void getSplitters(double cutOff, Map<String, ClassicCounter<List<String>>> nr, Map<List<String>, ClassicCounter<List<String>>> pr, Map<List<String>, ClassicCounter<List<String>>> gpr, Set<String> splitters) {
// do value of parent
for (String node : nr.keySet()) {
List<Pair<List<String>, Double>> answers = new ArrayList<>();
ClassicCounter<List<String>> cntr = nr.get(node);
double support = (cntr.totalCount());
for (List<String> key : pr.keySet()) {
if (key.get(0).equals(node)) {
// only do it if they match
ClassicCounter<List<String>> cntr2 = pr.get(key);
double support2 = cntr2.totalCount();
double kl = Counters.klDivergence(cntr2, cntr);
answers.add(new Pair<>(key, new Double(kl * support2)));
}
}
Collections.sort(answers, (o1, o2) -> o2.second().compareTo(o1.second()));
for (Pair<List<String>, Double> p : answers) {
double psd = p.second().doubleValue();
if (psd >= cutOff) {
List<String> lst = p.first();
String nd = lst.get(0);
String par = lst.get(1);
String name = nd + "^" + par;
splitters.add(name);
}
}
}
// do value of grandparent
for (List<String> node : pr.keySet()) {
ArrayList<Pair<List<String>, Double>> answers = Generics.newArrayList();
ClassicCounter<List<String>> cntr = pr.get(node);
double support = (cntr.totalCount());
if (support < SUPPCUTOFF) {
continue;
}
for (List<String> key : gpr.keySet()) {
if (key.get(0).equals(node.get(0)) && key.get(1).equals(node.get(1))) {
// only do it if they match
ClassicCounter<List<String>> cntr2 = gpr.get(key);
double support2 = (cntr2.totalCount());
double kl = Counters.klDivergence(cntr2, cntr);
answers.add(new Pair<>(key, new Double(kl * support2)));
}
}
Collections.sort(answers, (o1, o2) -> o2.second().compareTo(o1.second()));
for (Pair<List<String>, Double> answer : answers) {
Pair p = (Pair) answer;
double psd = ((Double) p.second()).doubleValue();
if (psd >= cutOff) {
List lst = (List) p.first();
String nd = (String) lst.get(0);
String par = (String) lst.get(1);
String gpar = (String) lst.get(2);
String name = nd + "^" + par + "~" + gpar;
splitters.add(name);
}
}
}
}
use of edu.stanford.nlp.util.Pair in project CoreNLP by stanfordnlp.
the class DocumentPreprocessor method findSyntacticRelationsFromDependency.
private static void findSyntacticRelationsFromDependency(List<Mention> orderedMentions) {
if (orderedMentions.size() == 0)
return;
markListMemberRelation(orderedMentions);
SemanticGraph dependency = orderedMentions.get(0).enhancedDependency;
// apposition
Set<Pair<Integer, Integer>> appos = Generics.newHashSet();
List<SemanticGraphEdge> appositions = dependency.findAllRelns(UniversalEnglishGrammaticalRelations.APPOSITIONAL_MODIFIER);
for (SemanticGraphEdge edge : appositions) {
int sIdx = edge.getSource().index() - 1;
int tIdx = edge.getTarget().index() - 1;
appos.add(Pair.makePair(sIdx, tIdx));
}
markMentionRelation(orderedMentions, appos, "APPOSITION");
// predicate nominatives
Set<Pair<Integer, Integer>> preNomi = Generics.newHashSet();
List<SemanticGraphEdge> copula = dependency.findAllRelns(UniversalEnglishGrammaticalRelations.COPULA);
for (SemanticGraphEdge edge : copula) {
IndexedWord source = edge.getSource();
IndexedWord target = dependency.getChildWithReln(source, UniversalEnglishGrammaticalRelations.NOMINAL_SUBJECT);
if (target == null)
target = dependency.getChildWithReln(source, UniversalEnglishGrammaticalRelations.CLAUSAL_SUBJECT);
// TODO
if (target == null)
continue;
// to handle relative clause: e.g., Tim who is a student,
if (target.tag().startsWith("W")) {
IndexedWord parent = dependency.getParent(source);
if (parent != null && dependency.reln(parent, source).equals(UniversalEnglishGrammaticalRelations.RELATIVE_CLAUSE_MODIFIER)) {
target = parent;
}
}
int sIdx = source.index() - 1;
int tIdx = target.index() - 1;
preNomi.add(Pair.makePair(tIdx, sIdx));
}
markMentionRelation(orderedMentions, preNomi, "PREDICATE_NOMINATIVE");
// relative pronouns TODO
Set<Pair<Integer, Integer>> relativePronounPairs = Generics.newHashSet();
markMentionRelation(orderedMentions, relativePronounPairs, "RELATIVE_PRONOUN");
}
use of edu.stanford.nlp.util.Pair in project CoreNLP by stanfordnlp.
the class CoNLLDocumentReader method writeTabSep.
public static void writeTabSep(PrintWriter pw, CoreMap sentence, CollectionValuedMap<String, CoreMap> chainmap) {
HeadFinder headFinder = new ModCollinsHeadFinder();
List<CoreLabel> sentenceAnno = sentence.get(CoreAnnotations.TokensAnnotation.class);
Tree sentenceTree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
Map<Pair<Integer, Integer>, String> sentenceInfo = Generics.newHashMap();
Set<Tree> sentenceSubTrees = sentenceTree.subTrees();
sentenceTree.setSpans();
Map<Pair<Integer, Integer>, Tree> treeSpanMap = Generics.newHashMap();
Map<Pair<Integer, Integer>, List<Tree>> wordSpanMap = Generics.newHashMap();
for (Tree ctree : sentenceSubTrees) {
IntPair span = ctree.getSpan();
if (span != null) {
treeSpanMap.put(Pair.makePair(span.getSource(), span.getTarget()), ctree);
wordSpanMap.put(Pair.makePair(span.getSource(), span.getTarget()), ctree.getLeaves());
}
}
String[][] finalSentence;
finalSentence = new String[sentenceAnno.size()][];
Map<Pair<Integer, Integer>, String> allHeads = Generics.newHashMap();
int index = -1;
for (CoreLabel newAnno : sentenceAnno) {
index += 1;
String word = newAnno.word();
String tag = newAnno.tag();
String cat = newAnno.ner();
String coref = newAnno.get(CorefCoreAnnotations.CorefAnnotation.class);
finalSentence[index] = new String[4];
finalSentence[index][0] = word;
finalSentence[index][1] = tag;
finalSentence[index][2] = cat;
finalSentence[index][3] = coref;
if (coref == null) {
sentenceInfo.put(Pair.makePair(index, index), coref);
finalSentence[index][3] = "O";
} else {
String[] allC = coref.split("\\|");
for (String corefG : allC) {
Pair<Integer, Integer> mention = getMention(index, corefG, sentenceAnno);
if (!include(sentenceInfo, mention, corefG)) {
// find largest NP in mention
sentenceInfo.put(mention, corefG);
Tree mentionTree = treeSpanMap.get(mention);
String head = null;
if (mentionTree != null) {
head = mentionTree.headTerminal(headFinder).nodeString();
} else if (mention.first.equals(mention.second)) {
head = word;
}
allHeads.put(mention, head);
}
}
if (allHeads.values().contains(word)) {
finalSentence[index][3] = "MENTION";
} else {
finalSentence[index][3] = "O";
}
}
}
for (int i = 0; i < finalSentence.length; i++) {
String[] wordInfo = finalSentence[i];
if (i < finalSentence.length - 1) {
String[] nextWordInfo = finalSentence[i + 1];
if (nextWordInfo[3].equals("MENTION") && nextWordInfo[0].equals("'s")) {
wordInfo[3] = "MENTION";
finalSentence[i + 1][3] = "O";
}
}
pw.println(wordInfo[0] + "\t" + wordInfo[1] + "\t" + wordInfo[2] + "\t" + wordInfo[3]);
}
pw.println("");
}
use of edu.stanford.nlp.util.Pair in project CoreNLP by stanfordnlp.
the class RothResultsByRelation method printResults.
@Override
public void printResults(PrintWriter pw, List<CoreMap> goldStandard, List<CoreMap> extractorOutput) {
featureFactory = MachineReading.makeRelationFeatureFactory(MachineReadingProperties.relationFeatureFactoryClass, MachineReadingProperties.relationFeatures, false);
// generic mentions work well in this domain
mentionFactory = new RelationMentionFactory();
ResultsPrinter.align(goldStandard, extractorOutput);
List<RelationMention> relations = new ArrayList<>();
final Map<RelationMention, String> predictions = new HashMap<>();
for (int i = 0; i < goldStandard.size(); i++) {
List<RelationMention> goldRelations = AnnotationUtils.getAllRelations(mentionFactory, goldStandard.get(i), true);
relations.addAll(goldRelations);
for (RelationMention rel : goldRelations) {
predictions.put(rel, AnnotationUtils.getRelation(mentionFactory, extractorOutput.get(i), rel.getArg(0), rel.getArg(1)).getType());
}
}
final Counter<Pair<Pair<String, String>, String>> pathCounts = new ClassicCounter<>();
for (RelationMention rel : relations) {
pathCounts.incrementCount(new Pair<>(new Pair<>(rel.getArg(0).getType(), rel.getArg(1).getType()), featureFactory.getFeature(rel, "dependency_path_lowlevel")));
}
Counter<String> singletonCorrect = new ClassicCounter<>();
Counter<String> singletonPredicted = new ClassicCounter<>();
Counter<String> singletonActual = new ClassicCounter<>();
for (RelationMention rel : relations) {
if (pathCounts.getCount(new Pair<>(new Pair<>(rel.getArg(0).getType(), rel.getArg(1).getType()), featureFactory.getFeature(rel, "dependency_path_lowlevel"))) == 1.0) {
String prediction = predictions.get(rel);
if (prediction.equals(rel.getType())) {
singletonCorrect.incrementCount(prediction);
}
singletonPredicted.incrementCount(prediction);
singletonActual.incrementCount(rel.getType());
}
}
class RelComp implements Comparator<RelationMention> {
@Override
public int compare(RelationMention rel1, RelationMention rel2) {
// Group together actual relations of a type with relations that were
// predicted to be that type
String prediction1 = predictions.get(rel1);
String prediction2 = predictions.get(rel2);
// String rel1group = RelationsSentence.isUnrelatedLabel(rel1.getType())
// ? prediction1 : rel1.getType();
// String rel2group = RelationsSentence.isUnrelatedLabel(rel2.getType())
// ? prediction2 : rel2.getType();
int entComp = (rel1.getArg(0).getType() + rel1.getArg(1).getType()).compareTo(rel2.getArg(0).getType() + rel2.getArg(1).getType());
// int groupComp = rel1group.compareTo(rel2group);
int typeComp = rel1.getType().compareTo(rel2.getType());
int predictionComp = prediction1.compareTo(prediction2);
// int pathComp =
// getFeature(rel1,"generalized_dependency_path").compareTo(getFeature(rel2,"generalized_dependency_path"));
double pathCount1 = pathCounts.getCount(new Pair<>(new Pair<>(rel1.getArg(0).getType(), rel1.getArg(1).getType()), featureFactory.getFeature(rel1, "dependency_path_lowlevel")));
double pathCount2 = pathCounts.getCount(new Pair<>(new Pair<>(rel2.getArg(0).getType(), rel2.getArg(1).getType()), featureFactory.getFeature(rel2, "dependency_path_lowlevel")));
if (entComp != 0) {
return entComp;
// } else if (pathComp != 0) {
// return pathComp;
} else if (pathCount1 < pathCount2) {
return -1;
} else if (pathCount1 > pathCount2) {
return 1;
} else if (typeComp != 0) {
return typeComp;
} else if (predictionComp != 0) {
return predictionComp;
} else {
return rel1.getSentence().get(CoreAnnotations.TextAnnotation.class).compareTo(rel2.getSentence().get(CoreAnnotations.TextAnnotation.class));
}
}
}
RelComp relComp = new RelComp();
Collections.sort(relations, relComp);
for (RelationMention rel : relations) {
String prediction = predictions.get(rel);
// if (RelationsSentence.isUnrelatedLabel(prediction) &&
// RelationsSentence.isUnrelatedLabel(rel.getType())) {
// continue;
// }
String type1 = rel.getArg(0).getType();
String type2 = rel.getArg(1).getType();
String path = featureFactory.getFeature(rel, "dependency_path_lowlevel");
if (!((type1.equals("PEOPLE") && type2.equals("PEOPLE")) || (type1.equals("PEOPLE") && type2.equals("LOCATION")) || (type1.equals("LOCATION") && type2.equals("LOCATION")) || (type1.equals("ORGANIZATION") && type2.equals("LOCATION")) || (type1.equals("PEOPLE") && type2.equals("ORGANIZATION")))) {
continue;
}
if (path.equals("")) {
continue;
}
pw.println("\nLABEL: " + prediction);
pw.println(rel);
pw.println(path);
pw.println(featureFactory.getFeatures(rel, "dependency_path_words"));
pw.println(featureFactory.getFeature(rel, "surface_path_POS"));
}
}
use of edu.stanford.nlp.util.Pair in project CoreNLP by stanfordnlp.
the class Units method loadUnits.
public static List<Unit> loadUnits(String filename) throws IOException {
Pattern commaPattern = Pattern.compile("\\s*,\\s*");
BufferedReader br = IOUtils.readerFromString(filename);
String headerString = br.readLine();
String[] header = commaPattern.split(headerString);
Map<String, Integer> headerIndex = new HashMap<>();
for (int i = 0; i < header.length; i++) {
headerIndex.put(header[i], i);
}
int iName = headerIndex.get("unit");
int iPrefix = headerIndex.get("prefix");
int iSymbol = headerIndex.get("symbol");
int iType = headerIndex.get("type");
int iSystem = headerIndex.get("system");
int iDefaultUnit = headerIndex.get("defaultUnit");
int iDefaultUnitScale = headerIndex.get("defaultUnitScale");
String line;
List<Unit> list = new ArrayList<>();
Map<String, Unit> unitsByName = new HashMap<>();
Map<String, Pair<String, Double>> unitToDefaultUnits = new HashMap<>();
while ((line = br.readLine()) != null) {
String[] fields = commaPattern.split(line);
Unit unit = new Unit(fields[iName], fields[iSymbol], fields[iType].toUpperCase());
unit.system = fields[iSystem];
if (fields.length > iPrefix) {
unit.prefixSystem = fields[iPrefix];
}
if (fields.length > iDefaultUnit) {
double scale = 1.0;
if (fields.length > iDefaultUnitScale) {
scale = Double.parseDouble(fields[iDefaultUnitScale]);
}
unitToDefaultUnits.put(unit.getName(), Pair.makePair(fields[iDefaultUnit], scale));
}
unitsByName.put(unit.getName(), unit);
list.add(unit);
}
for (Map.Entry<String, Pair<String, Double>> entry : unitToDefaultUnits.entrySet()) {
Unit unit = unitsByName.get(entry.getKey());
Unit defaultUnit = unitsByName.get(entry.getValue().first);
if (defaultUnit != null) {
unit.defaultUnit = defaultUnit;
unit.defaultUnitScale = entry.getValue().second;
} else {
Redwood.Util.warn("Unknown default unit " + entry.getValue().first + " for " + entry.getKey());
}
}
br.close();
return list;
}
Aggregations