Search in sources :

Example 6 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class Util method writeConllFile.

public static void writeConllFile(String outFile, List<CoreMap> sentences, List<DependencyTree> trees) {
    try {
        PrintWriter output = IOUtils.getPrintWriter(outFile);
        for (int i = 0; i < sentences.size(); i++) {
            CoreMap sentence = sentences.get(i);
            DependencyTree tree = trees.get(i);
            List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
            for (int j = 1, size = tokens.size(); j <= size; ++j) {
                CoreLabel token = tokens.get(j - 1);
                output.printf("%d\t%s\t_\t%s\t%s\t_\t%d\t%s\t_\t_%n", j, token.word(), token.tag(), token.tag(), tree.getHead(j), tree.getLabel(j));
            }
            output.println();
        }
        output.close();
    } catch (Exception e) {
        throw new RuntimeIOException(e);
    }
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException)

Example 7 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class ParserDemo method demoAPI.

/**
   * demoAPI demonstrates other ways of calling the parser with
   * already tokenized text, or in some cases, raw text that needs to
   * be tokenized as a single sentence.  Output is handled with a
   * TreePrint object.  Note that the options used when creating the
   * TreePrint can determine what results to print out.  Once again,
   * one can capture the output by passing a PrintWriter to
   * TreePrint.printTree. This code is for English.
   */
public static void demoAPI(LexicalizedParser lp) {
    // This option shows parsing a list of correctly tokenized words
    String[] sent = { "This", "is", "an", "easy", "sentence", "." };
    List<CoreLabel> rawWords = SentenceUtils.toCoreLabelList(sent);
    Tree parse = lp.apply(rawWords);
    parse.pennPrint();
    System.out.println();
    // This option shows loading and using an explicit tokenizer
    String sent2 = "This is another sentence.";
    TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
    Tokenizer<CoreLabel> tok = tokenizerFactory.getTokenizer(new StringReader(sent2));
    List<CoreLabel> rawWords2 = tok.tokenize();
    parse = lp.apply(rawWords2);
    // PennTreebankLanguagePack for English
    TreebankLanguagePack tlp = lp.treebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
    List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
    System.out.println(tdl);
    System.out.println();
    // You can also use a TreePrint object to print trees and dependencies
    TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
    tp.printTree(parse);
}
Also used : CoreLabelTokenFactory(edu.stanford.nlp.process.CoreLabelTokenFactory) CoreLabel(edu.stanford.nlp.ling.CoreLabel) StringReader(java.io.StringReader)

Example 8 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class Tdiff method markDiff.

/**
   * Marks bracketings in t2 not in t1 using the DoAnnotation field.
   * Returns a list of brackets in t1 not in t2.
   *
   * @param t1
   * @param t2
   * @return A list of brackets in t1 not in t2;
   */
public static Set<Constituent> markDiff(Tree t1, Tree t2) {
    //    if (t1 == null || t2 == null || ! t1.value().equals(t2.value())) {
    //      System.err.printf("t1 value is %s; t2 value is %s; t1 is %s t2 is %s", t1.value(), t2.value(), t1, t2);
    //    }
    Set<Constituent> t1Labels = (t1 == null) ? Generics.<Constituent>newHashSet() : t1.constituents(cf);
    if (t2 != null) {
        t2.setSpans();
        for (Tree subTree : t2) {
            if (subTree.isPhrasal()) {
                IntPair span = subTree.getSpan();
                Constituent c = cf.newConstituent(span.getSource(), span.getTarget(), subTree.label(), 0.0);
                if (t1Labels.contains(c)) {
                    t1Labels.remove(c);
                    ((CoreLabel) subTree.label()).set(CoreAnnotations.DoAnnotation.class, false);
                } else {
                    ((CoreLabel) subTree.label()).set(CoreAnnotations.DoAnnotation.class, true);
                }
            }
        }
    }
    return t1Labels;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) IntPair(edu.stanford.nlp.util.IntPair) Constituent(edu.stanford.nlp.trees.Constituent) LabeledConstituent(edu.stanford.nlp.trees.LabeledConstituent)

Example 9 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class ATBTreeUtils method taggedStringFromTree.

/**
   * Converts a parse tree into a string of tokens. Each token is a word and
   * its POS tag separated by the delimiter specified by <code>separator</code>
   * 
   * @param t - A parse tree
   * @param removeEscaping - If true, remove LDC escape characters. Otherwise, leave them.
   * @param separator Word/tag separator
   * @return A string of tagged words
   */
public static String taggedStringFromTree(Tree t, boolean removeEscaping, String separator) {
    t = t.prune(emptyFilter, tf);
    List<CoreLabel> taggedSentence = t.taggedLabeledYield();
    for (CoreLabel token : taggedSentence) {
        String word = (removeEscaping) ? unEscape(token.word()) : token.word();
        token.setWord(word);
        token.setValue(word);
    }
    return SentenceUtils.listToString(taggedSentence, false, separator);
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel)

Example 10 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class ArabicTreeNormalizer method normalizeWholeTree.

@Override
public Tree normalizeWholeTree(Tree tree, TreeFactory tf) {
    tree = tree.prune(emptyFilter, tf).spliceOut(aOverAFilter, tf);
    for (Tree t : tree) {
        if (t.isLeaf()) {
            //specified by HasContext.
            if (t.value().contains(MorphoFeatureSpecification.MORPHO_MARK)) {
                String[] toks = t.value().split(MorphoFeatureSpecification.MORPHO_MARK);
                if (toks.length != 2)
                    System.err.printf("%s: Word contains malformed morph annotation: %s%n", this.getClass().getName(), t.value());
                else if (t.label() instanceof CoreLabel) {
                    ((CoreLabel) t.label()).setValue(toks[0].trim().intern());
                    ((CoreLabel) t.label()).setWord(toks[0].trim().intern());
                    Pair<String, String> lemmaMorph = MorphoFeatureSpecification.splitMorphString(toks[0], toks[1]);
                    String lemma = lemmaMorph.first();
                    String morphAnalysis = lemmaMorph.second();
                    if (lemma.equals(toks[0])) {
                        ((CoreLabel) t.label()).setOriginalText(toks[1].trim().intern());
                    } else {
                        // TODO(speneg): Does this help?
                        String newLemma = lexMapper.map(null, lemma);
                        if (newLemma == null || newLemma.trim().length() == 0) {
                            newLemma = lemma;
                        }
                        String newMorphAnalysis = newLemma + MorphoFeatureSpecification.LEMMA_MARK + morphAnalysis;
                        ((CoreLabel) t.label()).setOriginalText(newMorphAnalysis.intern());
                    }
                } else {
                    System.err.printf("%s: Cannot store morph analysis in non-CoreLabel: %s%n", this.getClass().getName(), t.label().getClass().getName());
                }
            }
        } else if (t.isPreTerminal()) {
            if (t.value() == null || t.value().equals("")) {
                System.err.printf("%s: missing tag for\n%s\n", this.getClass().getName(), t.pennString());
            } else if (t.label() instanceof HasTag) {
                ((HasTag) t.label()).setTag(t.value());
            }
        } else {
            //Phrasal nodes
            // there are some nodes "/" missing preterminals.  We'll splice in a tag for these.
            int nk = t.numChildren();
            List<Tree> newKids = new ArrayList<>(nk);
            for (int j = 0; j < nk; j++) {
                Tree child = t.getChild(j);
                if (child.isLeaf()) {
                    System.err.printf("%s: Splicing in DUMMYTAG for%n%s%n", this.getClass().getName(), t.toString());
                    newKids.add(tf.newTreeNode("DUMMYTAG", Collections.singletonList(child)));
                } else {
                    newKids.add(child);
                }
            }
            t.setChildren(newKids);
        }
    }
    // special global coding for moving PRD annotation from constituent to verb tag.
    if (markPRDverb) {
        TregexMatcher m = prdVerbPattern.matcher(tree);
        Tree match = null;
        while (m.find()) {
            if (m.getMatch() != match) {
                match = m.getMatch();
                match.label().setValue(match.label().value() + "-PRDverb");
                Tree prd = m.getNode("prd");
                prd.label().setValue(super.normalizeNonterminal(prd.label().value()));
            }
        }
    }
    //Mark *only* subjects in verb-initial clauses
    if (retainNPSbj) {
        TregexMatcher m = npSbjPattern.matcher(tree);
        while (m.find()) {
            Tree match = m.getMatch();
            match.label().setValue("NP");
        }
    }
    if (tree.isPreTerminal()) {
        // The whole tree is a bare tag: bad!
        String val = tree.label().value();
        if (val.equals("CC") || val.startsWith("PUNC") || val.equals("CONJ")) {
            System.err.printf("%s: Bare tagged word being wrapped in FRAG\n%s\n", this.getClass().getName(), tree.pennString());
            tree = tf.newTreeNode("FRAG", Collections.singletonList(tree));
        } else {
            System.err.printf("%s: Bare tagged word\n%s\n", this.getClass().getName(), tree.pennString());
        }
    }
    //will return null. In this case, readers e.g. PennTreeReader will try to read the next tree.
    while (tree != null && (tree.value() == null || tree.value().equals("")) && tree.numChildren() <= 1) tree = tree.firstChild();
    if (tree != null && !tree.value().equals(rootLabel))
        tree = tf.newTreeNode(rootLabel, Collections.singletonList(tree));
    return tree;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) Tree(edu.stanford.nlp.trees.Tree) HasTag(edu.stanford.nlp.ling.HasTag) ArrayList(java.util.ArrayList) List(java.util.List) TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher) Pair(edu.stanford.nlp.util.Pair)

Aggregations

CoreLabel (edu.stanford.nlp.ling.CoreLabel)533 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)310 CoreMap (edu.stanford.nlp.util.CoreMap)102 ArrayList (java.util.ArrayList)101 Tree (edu.stanford.nlp.trees.Tree)98 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)96 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)63 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)53 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)41 IndexedWord (edu.stanford.nlp.ling.IndexedWord)38 List (java.util.List)33 Annotation (edu.stanford.nlp.pipeline.Annotation)31 Mention (edu.stanford.nlp.coref.data.Mention)29 Label (edu.stanford.nlp.ling.Label)28 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)26 Properties (java.util.Properties)24 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)21 CoreAnnotation (edu.stanford.nlp.ling.CoreAnnotation)19 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)18 StringReader (java.io.StringReader)18