Search in sources :

Example 1 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project mavuno by metzlerd.

the class NLProcTools method getNETags.

public List<String> getNETags() {
    List<String> neTags = new ArrayList<String>();
    List<CoreLabel> labels = mNETagger.classifySentence(mSentenceWords);
    for (CoreLabel label : labels) {
        neTags.add(label.get(AnswerAnnotation.class));
    }
    return neTags;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) AnswerAnnotation(edu.stanford.nlp.ling.CoreAnnotations.AnswerAnnotation) ArrayList(java.util.ArrayList)

Example 2 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class DependencyParser method genDictionaries.

/**
   * Scan a corpus and store all words, part-of-speech tags, and
   * dependency relation labels observed. Prepare other structures
   * which support word / POS / label lookup at train- / run-time.
   */
private void genDictionaries(List<CoreMap> sents, List<DependencyTree> trees) {
    // Collect all words (!), etc. in lists, tacking on one sentence
    // after the other
    List<String> word = new ArrayList<>();
    List<String> pos = new ArrayList<>();
    List<String> label = new ArrayList<>();
    for (CoreMap sentence : sents) {
        List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
        for (CoreLabel token : tokens) {
            word.add(token.word());
            pos.add(token.tag());
        }
    }
    String rootLabel = null;
    for (DependencyTree tree : trees) for (int k = 1; k <= tree.n; ++k) if (tree.getHead(k) == 0)
        rootLabel = tree.getLabel(k);
    else
        label.add(tree.getLabel(k));
    // Generate "dictionaries," possibly with frequency cutoff
    knownWords = Util.generateDict(word, config.wordCutOff);
    knownPos = Util.generateDict(pos);
    knownLabels = Util.generateDict(label);
    knownLabels.add(0, rootLabel);
    // Avoid the case that rootLabel equals to one of the other labels
    for (int k = 1; k < knownLabels.size(); ++k) if (knownLabels.get(k).equals(rootLabel)) {
        knownLabels.remove(k);
        break;
    }
    knownWords.add(0, Config.UNKNOWN);
    knownWords.add(1, Config.NULL);
    knownWords.add(2, Config.ROOT);
    knownPos.add(0, Config.UNKNOWN);
    knownPos.add(1, Config.NULL);
    knownPos.add(2, Config.ROOT);
    knownLabels.add(0, Config.NULL);
    generateIDs();
    log.info(Config.SEPARATOR);
    log.info("#Word: " + knownWords.size());
    log.info("#POS:" + knownPos.size());
    log.info("#Label: " + knownLabels.size());
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 3 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class NegraPennTreebankParserParams method transformTree.

/**
   * transformTree does all language-specific tree
   * transformations. Any parameterizations should be inside the
   * specific TreebankLangParserarams class.
   */
@Override
public Tree transformTree(Tree t, Tree root) {
    if (t == null || t.isLeaf()) {
        return t;
    }
    List<String> annotations = new ArrayList<>();
    CoreLabel lab = (CoreLabel) t.label();
    String word = lab.word();
    String tag = lab.tag();
    String cat = lab.value();
    String baseCat = treebankLanguagePack().basicCategory(cat);
    //categories -- at present there is no tag annotation!!
    if (t.isPhrasal()) {
        List<String> childBasicCats = childBasicCats(t);
        // mark vp's headed by "zu" verbs
        if (DEBUG) {
            if (markZuVP && baseCat.equals("VP")) {
                System.out.println("child basic cats: " + childBasicCats);
            }
        }
        if (markZuVP && baseCat.equals("VP") && (childBasicCats.contains("VZ") || childBasicCats.contains("VVIZU"))) {
            if (DEBUG)
                System.out.println("Marked zu VP" + t);
            annotations.add("%ZU");
        }
        // mark relative clause S's
        if (markRC && (t.label() instanceof NegraLabel) && baseCat.equals("S") && ((NegraLabel) t.label()).getEdge() != null && ((NegraLabel) t.label()).getEdge().equals("RC")) {
            if (DEBUG) {
                System.out.println("annotating this guy as RC:");
                t.pennPrint();
            }
            //throw new RuntimeException("damn, not a Negra Label");
            annotations.add("%RC");
        }
        if (markContainsV && containsVP(t)) {
            annotations.add("%vp");
        }
        if (markLP && leftPhrasal(t)) {
            annotations.add("%LP");
        }
        if (markKonjParent) {
            // this depends on functional tags being present
            for (String cCat : childBasicCats) {
                if (cCat.contains("-KONJ")) {
                    annotations.add("%konjp");
                    break;
                }
            }
        }
        if (markHDParent) {
            // this depends on functional tags being present
            for (String cCat : childBasicCats) {
                if (cCat.contains("-HD")) {
                    annotations.add("%hdp");
                    break;
                }
            }
        }
    } else {
        //t.isPreTerminal() case
        if (markColon && cat.equals("$.") && (word.equals(":") || word.equals(";"))) {
            annotations.add("-%colon");
        }
    }
    //    if(t.isPreTerminal()) {
    //      if(parent != null) {
    //        String parentVal = parent.label().value();
    //        int cutOffPtD = parentVal.indexOf('-');
    //        int cutOffPtC = parentVal.indexOf('^');
    //        int curMin = parentVal.length();
    //        if(cutOffPtD != -1) {
    //          curMin = cutOffPtD;
    //        }
    //        if(cutOffPtC != -1) {
    //          curMin = Math.min(curMin, cutOffPtC);
    //        }
    //        parentVal = parentVal.substring(0, curMin);
    //        annotations.add("^" + parentVal);
    //      }
    //    }
    // put on all the annotations
    StringBuilder catSB = new StringBuilder(cat);
    for (String annotation : annotations) {
        catSB.append(annotation);
    }
    t.setLabel(new CategoryWordTag(catSB.toString(), word, tag));
    return t;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) NegraLabel(edu.stanford.nlp.trees.international.negra.NegraLabel) CategoryWordTag(edu.stanford.nlp.ling.CategoryWordTag)

Example 4 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class LeafAncestorEval method display.

public void display(boolean verbose, PrintWriter pw) {
    final Random rand = new Random();
    double corpusLevel = corpusAvg / corpusNum;
    double sentLevel = sentAvg / sentNum;
    double sentEx = 100.0 * sentExact / sentNum;
    if (verbose) {
        Map<Double, List<CoreLabel>> avgMap = new TreeMap<>();
        for (Map.Entry<List<CoreLabel>, Double> entry : catAvg.entrySet()) {
            double avg = entry.getValue() / catNum.get(entry.getKey());
            if (Double.isNaN(avg)) {
                avg = -1.0;
            }
            if (avgMap.containsKey(avg)) {
                avgMap.put(avg + (rand.nextDouble() / 10000.0), entry.getKey());
            } else {
                avgMap.put(avg, entry.getKey());
            }
        }
        pw.println("============================================================");
        pw.println("Leaf Ancestor Metric" + "(" + name + ") -- final statistics");
        pw.println("============================================================");
        pw.println("#Sentences: " + (int) sentNum);
        pw.println();
        pw.println("Sentence-level (macro-averaged)");
        pw.printf(" Avg: %.3f%n", sentLevel);
        pw.printf(" Exact: %.2f%%%n", sentEx);
        pw.println();
        pw.println("Corpus-level (micro-averaged)");
        pw.printf(" Avg: %.3f%n", corpusLevel);
        pw.println("============================================================");
        for (List<CoreLabel> lineage : avgMap.values()) {
            if (catNum.get(lineage) < 30.0)
                continue;
            double avg = catAvg.get(lineage) / catNum.get(lineage);
            pw.printf(" %.3f\t%d\t%s%n", avg, (int) ((double) catNum.get(lineage)), toString(lineage));
        }
        pw.println("============================================================");
    } else {
        pw.printf("%s summary: corpus: %.3f sent: %.3f sent-ex: %.2f%n", name, corpusLevel, sentLevel, sentEx);
    }
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) Random(java.util.Random) ArrayList(java.util.ArrayList) List(java.util.List) TreeMap(java.util.TreeMap) TreeMap(java.util.TreeMap) Map(java.util.Map)

Example 5 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class DependencyParser method predict.

/**
   * Convenience method for {@link #predict(edu.stanford.nlp.util.CoreMap)}. The tokens of the provided sentence must
   * also have tag annotations (the parser requires part-of-speech tags).
   *
   * @see #predict(edu.stanford.nlp.util.CoreMap)
   */
public GrammaticalStructure predict(List<? extends HasWord> sentence) {
    CoreLabel sentenceLabel = new CoreLabel();
    List<CoreLabel> tokens = new ArrayList<>();
    int i = 1;
    for (HasWord wd : sentence) {
        CoreLabel label;
        if (wd instanceof CoreLabel) {
            label = (CoreLabel) wd;
            if (label.tag() == null)
                throw new IllegalArgumentException("Parser requires words " + "with part-of-speech tag annotations");
        } else {
            label = new CoreLabel();
            label.setValue(wd.word());
            label.setWord(wd.word());
            if (!(wd instanceof HasTag))
                throw new IllegalArgumentException("Parser requires words " + "with part-of-speech tag annotations");
            label.setTag(((HasTag) wd).tag());
        }
        label.setIndex(i);
        i++;
        tokens.add(label);
    }
    sentenceLabel.set(CoreAnnotations.TokensAnnotation.class, tokens);
    return predict(sentenceLabel);
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) HasTag(edu.stanford.nlp.ling.HasTag)

Aggregations

CoreLabel (edu.stanford.nlp.ling.CoreLabel)533 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)310 CoreMap (edu.stanford.nlp.util.CoreMap)102 ArrayList (java.util.ArrayList)101 Tree (edu.stanford.nlp.trees.Tree)98 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)96 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)63 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)53 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)41 IndexedWord (edu.stanford.nlp.ling.IndexedWord)38 List (java.util.List)33 Annotation (edu.stanford.nlp.pipeline.Annotation)31 Mention (edu.stanford.nlp.coref.data.Mention)29 Label (edu.stanford.nlp.ling.Label)28 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)26 Properties (java.util.Properties)24 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)21 CoreAnnotation (edu.stanford.nlp.ling.CoreAnnotation)19 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)18 StringReader (java.io.StringReader)18