Search in sources :

Example 1 with Pair

use of edu.stanford.nlp.util.Pair in project CoreNLP by stanfordnlp.

the class ParentAnnotationStats method getSplitters.

private static void getSplitters(double cutOff, Map<String, ClassicCounter<List<String>>> nr, Map<List<String>, ClassicCounter<List<String>>> pr, Map<List<String>, ClassicCounter<List<String>>> gpr, Set<String> splitters) {
    // do value of parent
    for (String node : nr.keySet()) {
        List<Pair<List<String>, Double>> answers = new ArrayList<>();
        ClassicCounter<List<String>> cntr = nr.get(node);
        double support = (cntr.totalCount());
        for (List<String> key : pr.keySet()) {
            if (key.get(0).equals(node)) {
                // only do it if they match
                ClassicCounter<List<String>> cntr2 = pr.get(key);
                double support2 = cntr2.totalCount();
                double kl = Counters.klDivergence(cntr2, cntr);
                answers.add(new Pair<>(key, new Double(kl * support2)));
            }
        }
        Collections.sort(answers, (o1, o2) -> o2.second().compareTo(o1.second()));
        for (Pair<List<String>, Double> p : answers) {
            double psd = p.second().doubleValue();
            if (psd >= cutOff) {
                List<String> lst = p.first();
                String nd = lst.get(0);
                String par = lst.get(1);
                String name = nd + "^" + par;
                splitters.add(name);
            }
        }
    }
    // do value of grandparent
    for (List<String> node : pr.keySet()) {
        ArrayList<Pair<List<String>, Double>> answers = Generics.newArrayList();
        ClassicCounter<List<String>> cntr = pr.get(node);
        double support = (cntr.totalCount());
        if (support < SUPPCUTOFF) {
            continue;
        }
        for (List<String> key : gpr.keySet()) {
            if (key.get(0).equals(node.get(0)) && key.get(1).equals(node.get(1))) {
                // only do it if they match
                ClassicCounter<List<String>> cntr2 = gpr.get(key);
                double support2 = (cntr2.totalCount());
                double kl = Counters.klDivergence(cntr2, cntr);
                answers.add(new Pair<>(key, new Double(kl * support2)));
            }
        }
        Collections.sort(answers, (o1, o2) -> o2.second().compareTo(o1.second()));
        for (Pair<List<String>, Double> answer : answers) {
            Pair p = (Pair) answer;
            double psd = ((Double) p.second()).doubleValue();
            if (psd >= cutOff) {
                List lst = (List) p.first();
                String nd = (String) lst.get(0);
                String par = (String) lst.get(1);
                String gpar = (String) lst.get(2);
                String name = nd + "^" + par + "~" + gpar;
                splitters.add(name);
            }
        }
    }
}
Also used : Pair(edu.stanford.nlp.util.Pair)

Example 2 with Pair

use of edu.stanford.nlp.util.Pair in project CoreNLP by stanfordnlp.

the class DocumentPreprocessor method findSyntacticRelationsFromDependency.

private static void findSyntacticRelationsFromDependency(List<Mention> orderedMentions) {
    if (orderedMentions.size() == 0)
        return;
    markListMemberRelation(orderedMentions);
    SemanticGraph dependency = orderedMentions.get(0).enhancedDependency;
    // apposition
    Set<Pair<Integer, Integer>> appos = Generics.newHashSet();
    List<SemanticGraphEdge> appositions = dependency.findAllRelns(UniversalEnglishGrammaticalRelations.APPOSITIONAL_MODIFIER);
    for (SemanticGraphEdge edge : appositions) {
        int sIdx = edge.getSource().index() - 1;
        int tIdx = edge.getTarget().index() - 1;
        appos.add(Pair.makePair(sIdx, tIdx));
    }
    markMentionRelation(orderedMentions, appos, "APPOSITION");
    // predicate nominatives
    Set<Pair<Integer, Integer>> preNomi = Generics.newHashSet();
    List<SemanticGraphEdge> copula = dependency.findAllRelns(UniversalEnglishGrammaticalRelations.COPULA);
    for (SemanticGraphEdge edge : copula) {
        IndexedWord source = edge.getSource();
        IndexedWord target = dependency.getChildWithReln(source, UniversalEnglishGrammaticalRelations.NOMINAL_SUBJECT);
        if (target == null)
            target = dependency.getChildWithReln(source, UniversalEnglishGrammaticalRelations.CLAUSAL_SUBJECT);
        // TODO
        if (target == null)
            continue;
        // to handle relative clause: e.g., Tim who is a student,
        if (target.tag().startsWith("W")) {
            IndexedWord parent = dependency.getParent(source);
            if (parent != null && dependency.reln(parent, source).equals(UniversalEnglishGrammaticalRelations.RELATIVE_CLAUSE_MODIFIER)) {
                target = parent;
            }
        }
        int sIdx = source.index() - 1;
        int tIdx = target.index() - 1;
        preNomi.add(Pair.makePair(tIdx, sIdx));
    }
    markMentionRelation(orderedMentions, preNomi, "PREDICATE_NOMINATIVE");
    // relative pronouns  TODO
    Set<Pair<Integer, Integer>> relativePronounPairs = Generics.newHashSet();
    markMentionRelation(orderedMentions, relativePronounPairs, "RELATIVE_PRONOUN");
}
Also used : SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IndexedWord(edu.stanford.nlp.ling.IndexedWord) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge) IntPair(edu.stanford.nlp.util.IntPair) Pair(edu.stanford.nlp.util.Pair)

Example 3 with Pair

use of edu.stanford.nlp.util.Pair in project CoreNLP by stanfordnlp.

the class CoNLLDocumentReader method writeTabSep.

public static void writeTabSep(PrintWriter pw, CoreMap sentence, CollectionValuedMap<String, CoreMap> chainmap) {
    HeadFinder headFinder = new ModCollinsHeadFinder();
    List<CoreLabel> sentenceAnno = sentence.get(CoreAnnotations.TokensAnnotation.class);
    Tree sentenceTree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
    Map<Pair<Integer, Integer>, String> sentenceInfo = Generics.newHashMap();
    Set<Tree> sentenceSubTrees = sentenceTree.subTrees();
    sentenceTree.setSpans();
    Map<Pair<Integer, Integer>, Tree> treeSpanMap = Generics.newHashMap();
    Map<Pair<Integer, Integer>, List<Tree>> wordSpanMap = Generics.newHashMap();
    for (Tree ctree : sentenceSubTrees) {
        IntPair span = ctree.getSpan();
        if (span != null) {
            treeSpanMap.put(Pair.makePair(span.getSource(), span.getTarget()), ctree);
            wordSpanMap.put(Pair.makePair(span.getSource(), span.getTarget()), ctree.getLeaves());
        }
    }
    String[][] finalSentence;
    finalSentence = new String[sentenceAnno.size()][];
    Map<Pair<Integer, Integer>, String> allHeads = Generics.newHashMap();
    int index = -1;
    for (CoreLabel newAnno : sentenceAnno) {
        index += 1;
        String word = newAnno.word();
        String tag = newAnno.tag();
        String cat = newAnno.ner();
        String coref = newAnno.get(CorefCoreAnnotations.CorefAnnotation.class);
        finalSentence[index] = new String[4];
        finalSentence[index][0] = word;
        finalSentence[index][1] = tag;
        finalSentence[index][2] = cat;
        finalSentence[index][3] = coref;
        if (coref == null) {
            sentenceInfo.put(Pair.makePair(index, index), coref);
            finalSentence[index][3] = "O";
        } else {
            String[] allC = coref.split("\\|");
            for (String corefG : allC) {
                Pair<Integer, Integer> mention = getMention(index, corefG, sentenceAnno);
                if (!include(sentenceInfo, mention, corefG)) {
                    // find largest NP in mention
                    sentenceInfo.put(mention, corefG);
                    Tree mentionTree = treeSpanMap.get(mention);
                    String head = null;
                    if (mentionTree != null) {
                        head = mentionTree.headTerminal(headFinder).nodeString();
                    } else if (mention.first.equals(mention.second)) {
                        head = word;
                    }
                    allHeads.put(mention, head);
                }
            }
            if (allHeads.values().contains(word)) {
                finalSentence[index][3] = "MENTION";
            } else {
                finalSentence[index][3] = "O";
            }
        }
    }
    for (int i = 0; i < finalSentence.length; i++) {
        String[] wordInfo = finalSentence[i];
        if (i < finalSentence.length - 1) {
            String[] nextWordInfo = finalSentence[i + 1];
            if (nextWordInfo[3].equals("MENTION") && nextWordInfo[0].equals("'s")) {
                wordInfo[3] = "MENTION";
                finalSentence[i + 1][3] = "O";
            }
        }
        pw.println(wordInfo[0] + "\t" + wordInfo[1] + "\t" + wordInfo[2] + "\t" + wordInfo[3]);
    }
    pw.println("");
}
Also used : ModCollinsHeadFinder(edu.stanford.nlp.trees.ModCollinsHeadFinder) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) IntPair(edu.stanford.nlp.util.IntPair) ModCollinsHeadFinder(edu.stanford.nlp.trees.ModCollinsHeadFinder) HeadFinder(edu.stanford.nlp.trees.HeadFinder) ChineseSemanticHeadFinder(edu.stanford.nlp.trees.international.pennchinese.ChineseSemanticHeadFinder) CoreLabel(edu.stanford.nlp.ling.CoreLabel) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) List(java.util.List) ArrayList(java.util.ArrayList) IntPair(edu.stanford.nlp.util.IntPair) Pair(edu.stanford.nlp.util.Pair)

Example 4 with Pair

use of edu.stanford.nlp.util.Pair in project CoreNLP by stanfordnlp.

the class RothResultsByRelation method printResults.

@Override
public void printResults(PrintWriter pw, List<CoreMap> goldStandard, List<CoreMap> extractorOutput) {
    featureFactory = MachineReading.makeRelationFeatureFactory(MachineReadingProperties.relationFeatureFactoryClass, MachineReadingProperties.relationFeatures, false);
    // generic mentions work well in this domain
    mentionFactory = new RelationMentionFactory();
    ResultsPrinter.align(goldStandard, extractorOutput);
    List<RelationMention> relations = new ArrayList<>();
    final Map<RelationMention, String> predictions = new HashMap<>();
    for (int i = 0; i < goldStandard.size(); i++) {
        List<RelationMention> goldRelations = AnnotationUtils.getAllRelations(mentionFactory, goldStandard.get(i), true);
        relations.addAll(goldRelations);
        for (RelationMention rel : goldRelations) {
            predictions.put(rel, AnnotationUtils.getRelation(mentionFactory, extractorOutput.get(i), rel.getArg(0), rel.getArg(1)).getType());
        }
    }
    final Counter<Pair<Pair<String, String>, String>> pathCounts = new ClassicCounter<>();
    for (RelationMention rel : relations) {
        pathCounts.incrementCount(new Pair<>(new Pair<>(rel.getArg(0).getType(), rel.getArg(1).getType()), featureFactory.getFeature(rel, "dependency_path_lowlevel")));
    }
    Counter<String> singletonCorrect = new ClassicCounter<>();
    Counter<String> singletonPredicted = new ClassicCounter<>();
    Counter<String> singletonActual = new ClassicCounter<>();
    for (RelationMention rel : relations) {
        if (pathCounts.getCount(new Pair<>(new Pair<>(rel.getArg(0).getType(), rel.getArg(1).getType()), featureFactory.getFeature(rel, "dependency_path_lowlevel"))) == 1.0) {
            String prediction = predictions.get(rel);
            if (prediction.equals(rel.getType())) {
                singletonCorrect.incrementCount(prediction);
            }
            singletonPredicted.incrementCount(prediction);
            singletonActual.incrementCount(rel.getType());
        }
    }
    class RelComp implements Comparator<RelationMention> {

        @Override
        public int compare(RelationMention rel1, RelationMention rel2) {
            // Group together actual relations of a type with relations that were
            // predicted to be that type
            String prediction1 = predictions.get(rel1);
            String prediction2 = predictions.get(rel2);
            // String rel1group = RelationsSentence.isUnrelatedLabel(rel1.getType())
            // ? prediction1 : rel1.getType();
            // String rel2group = RelationsSentence.isUnrelatedLabel(rel2.getType())
            // ? prediction2 : rel2.getType();
            int entComp = (rel1.getArg(0).getType() + rel1.getArg(1).getType()).compareTo(rel2.getArg(0).getType() + rel2.getArg(1).getType());
            // int groupComp = rel1group.compareTo(rel2group);
            int typeComp = rel1.getType().compareTo(rel2.getType());
            int predictionComp = prediction1.compareTo(prediction2);
            // int pathComp =
            // getFeature(rel1,"generalized_dependency_path").compareTo(getFeature(rel2,"generalized_dependency_path"));
            double pathCount1 = pathCounts.getCount(new Pair<>(new Pair<>(rel1.getArg(0).getType(), rel1.getArg(1).getType()), featureFactory.getFeature(rel1, "dependency_path_lowlevel")));
            double pathCount2 = pathCounts.getCount(new Pair<>(new Pair<>(rel2.getArg(0).getType(), rel2.getArg(1).getType()), featureFactory.getFeature(rel2, "dependency_path_lowlevel")));
            if (entComp != 0) {
                return entComp;
            // } else if (pathComp != 0) {
            // return pathComp;
            } else if (pathCount1 < pathCount2) {
                return -1;
            } else if (pathCount1 > pathCount2) {
                return 1;
            } else if (typeComp != 0) {
                return typeComp;
            } else if (predictionComp != 0) {
                return predictionComp;
            } else {
                return rel1.getSentence().get(CoreAnnotations.TextAnnotation.class).compareTo(rel2.getSentence().get(CoreAnnotations.TextAnnotation.class));
            }
        }
    }
    RelComp relComp = new RelComp();
    Collections.sort(relations, relComp);
    for (RelationMention rel : relations) {
        String prediction = predictions.get(rel);
        // if (RelationsSentence.isUnrelatedLabel(prediction) &&
        // RelationsSentence.isUnrelatedLabel(rel.getType())) {
        // continue;
        // }
        String type1 = rel.getArg(0).getType();
        String type2 = rel.getArg(1).getType();
        String path = featureFactory.getFeature(rel, "dependency_path_lowlevel");
        if (!((type1.equals("PEOPLE") && type2.equals("PEOPLE")) || (type1.equals("PEOPLE") && type2.equals("LOCATION")) || (type1.equals("LOCATION") && type2.equals("LOCATION")) || (type1.equals("ORGANIZATION") && type2.equals("LOCATION")) || (type1.equals("PEOPLE") && type2.equals("ORGANIZATION")))) {
            continue;
        }
        if (path.equals("")) {
            continue;
        }
        pw.println("\nLABEL: " + prediction);
        pw.println(rel);
        pw.println(path);
        pw.println(featureFactory.getFeatures(rel, "dependency_path_words"));
        pw.println(featureFactory.getFeature(rel, "surface_path_POS"));
    }
}
Also used : RelationMentionFactory(edu.stanford.nlp.ie.machinereading.structure.RelationMentionFactory) RelationMention(edu.stanford.nlp.ie.machinereading.structure.RelationMention) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Comparator(java.util.Comparator) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) Pair(edu.stanford.nlp.util.Pair)

Example 5 with Pair

use of edu.stanford.nlp.util.Pair in project CoreNLP by stanfordnlp.

the class Units method loadUnits.

public static List<Unit> loadUnits(String filename) throws IOException {
    Pattern commaPattern = Pattern.compile("\\s*,\\s*");
    BufferedReader br = IOUtils.readerFromString(filename);
    String headerString = br.readLine();
    String[] header = commaPattern.split(headerString);
    Map<String, Integer> headerIndex = new HashMap<>();
    for (int i = 0; i < header.length; i++) {
        headerIndex.put(header[i], i);
    }
    int iName = headerIndex.get("unit");
    int iPrefix = headerIndex.get("prefix");
    int iSymbol = headerIndex.get("symbol");
    int iType = headerIndex.get("type");
    int iSystem = headerIndex.get("system");
    int iDefaultUnit = headerIndex.get("defaultUnit");
    int iDefaultUnitScale = headerIndex.get("defaultUnitScale");
    String line;
    List<Unit> list = new ArrayList<>();
    Map<String, Unit> unitsByName = new HashMap<>();
    Map<String, Pair<String, Double>> unitToDefaultUnits = new HashMap<>();
    while ((line = br.readLine()) != null) {
        String[] fields = commaPattern.split(line);
        Unit unit = new Unit(fields[iName], fields[iSymbol], fields[iType].toUpperCase());
        unit.system = fields[iSystem];
        if (fields.length > iPrefix) {
            unit.prefixSystem = fields[iPrefix];
        }
        if (fields.length > iDefaultUnit) {
            double scale = 1.0;
            if (fields.length > iDefaultUnitScale) {
                scale = Double.parseDouble(fields[iDefaultUnitScale]);
            }
            unitToDefaultUnits.put(unit.getName(), Pair.makePair(fields[iDefaultUnit], scale));
        }
        unitsByName.put(unit.getName(), unit);
        list.add(unit);
    }
    for (Map.Entry<String, Pair<String, Double>> entry : unitToDefaultUnits.entrySet()) {
        Unit unit = unitsByName.get(entry.getKey());
        Unit defaultUnit = unitsByName.get(entry.getValue().first);
        if (defaultUnit != null) {
            unit.defaultUnit = defaultUnit;
            unit.defaultUnitScale = entry.getValue().second;
        } else {
            Redwood.Util.warn("Unknown default unit " + entry.getValue().first + " for " + entry.getKey());
        }
    }
    br.close();
    return list;
}
Also used : Pattern(java.util.regex.Pattern) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) BufferedReader(java.io.BufferedReader) HashMap(java.util.HashMap) Map(java.util.Map) Pair(edu.stanford.nlp.util.Pair)

Aggregations

Pair (edu.stanford.nlp.util.Pair)121 ArrayList (java.util.ArrayList)38 CoreLabel (edu.stanford.nlp.ling.CoreLabel)28 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)24 CoreMap (edu.stanford.nlp.util.CoreMap)22 List (java.util.List)20 Tree (edu.stanford.nlp.trees.Tree)19 IndexedWord (edu.stanford.nlp.ling.IndexedWord)15 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)15 Map (java.util.Map)15 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)13 IntPair (edu.stanford.nlp.util.IntPair)11 HashMap (java.util.HashMap)11 LexicalizedParser (edu.stanford.nlp.parser.lexparser.LexicalizedParser)7 TregexPattern (edu.stanford.nlp.trees.tregex.TregexPattern)7 HashSet (java.util.HashSet)7 EvaluateTreebank (edu.stanford.nlp.parser.metrics.EvaluateTreebank)6 IOException (java.io.IOException)6 StringReader (java.io.StringReader)6 RuntimeIOException (edu.stanford.nlp.io.RuntimeIOException)5