Search in sources :

Example 6 with HasIndex

use of edu.stanford.nlp.ling.HasIndex in project CoreNLP by stanfordnlp.

the class Tree method dependencies.

/**
   * Return a set of TaggedWord-TaggedWord dependencies, represented as
   * Dependency objects, for the Tree.  This will only give
   * useful results if the internal tree node labels support HasWord and
   * head percolation has already been done (see percolateHeads()).
   *
   * @param f Dependencies are excluded for which the Dependency is not
   *          accepted by the Filter
   * @return Set of dependencies (each a Dependency)
   */
public Set<Dependency<Label, Label, Object>> dependencies(Predicate<Dependency<Label, Label, Object>> f, boolean isConcrete, boolean copyLabel, boolean copyPosTag) {
    Set<Dependency<Label, Label, Object>> deps = Generics.newHashSet();
    for (Tree node : this) {
        // Skip leaves and unary re-writes
        if (node.isLeaf() || node.children().length < 2) {
            continue;
        }
        // Create the head label (percolateHeads has already been executed)
        Label headLabel = makeDependencyLabel(node.label(), copyLabel, isConcrete, copyPosTag);
        String headWord = ((HasWord) headLabel).word();
        if (headWord == null) {
            headWord = headLabel.value();
        }
        int headIndex = (isConcrete && (headLabel instanceof HasIndex)) ? ((HasIndex) headLabel).index() : -1;
        // every child with a different (or repeated) head is an argument
        boolean seenHead = false;
        for (Tree child : node.children()) {
            Label depLabel = makeDependencyLabel(child.label(), copyLabel, isConcrete, copyPosTag);
            String depWord = ((HasWord) depLabel).word();
            if (depWord == null) {
                depWord = depLabel.value();
            }
            int depIndex = (isConcrete && (depLabel instanceof HasIndex)) ? ((HasIndex) depLabel).index() : -1;
            if (!seenHead && headIndex == depIndex && headWord.equals(depWord)) {
                seenHead = true;
            } else {
                Dependency<Label, Label, Object> dependency = (isConcrete && depIndex != headIndex) ? new UnnamedConcreteDependency(headLabel, depLabel) : new UnnamedDependency(headLabel, depLabel);
                if (f.test(dependency)) {
                    deps.add(dependency);
                }
            }
        }
    }
    return deps;
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Label(edu.stanford.nlp.ling.Label) HasIndex(edu.stanford.nlp.ling.HasIndex)

Example 7 with HasIndex

use of edu.stanford.nlp.ling.HasIndex in project CoreNLP by stanfordnlp.

the class Tree method percolateHeads.

/**
   * Finds the heads of the tree.  This code assumes that the label
   * does store and return sensible values for the category, word, and tag.
   * It will be a no-op otherwise.  The tree is modified.  The routine
   * assumes the Tree has word leaves and tag preterminals, and copies
   * their category to word and tag respectively, if they have a null
   * value.
   *
   * @param hf The headfinding algorithm to use
   */
public void percolateHeads(HeadFinder hf) {
    Label nodeLabel = label();
    if (isLeaf()) {
        // Sanity check: word() is usually set by the TreeReader.
        if (nodeLabel instanceof HasWord) {
            HasWord w = (HasWord) nodeLabel;
            if (w.word() == null) {
                w.setWord(nodeLabel.value());
            }
        }
    } else {
        for (Tree kid : children()) {
            kid.percolateHeads(hf);
        }
        final Tree head = hf.determineHead(this);
        if (head != null) {
            final Label headLabel = head.label();
            // Set the head tag.
            String headTag = (headLabel instanceof HasTag) ? ((HasTag) headLabel).tag() : null;
            if (headTag == null && head.isLeaf()) {
                // below us is a leaf
                headTag = nodeLabel.value();
            }
            // Set the head word
            String headWord = (headLabel instanceof HasWord) ? ((HasWord) headLabel).word() : null;
            if (headWord == null && head.isLeaf()) {
                // below us is a leaf
                // this might be useful despite case for leaf above in
                // case the leaf label type doesn't support word()
                headWord = headLabel.value();
            }
            // Set the head index
            int headIndex = (headLabel instanceof HasIndex) ? ((HasIndex) headLabel).index() : -1;
            if (nodeLabel instanceof HasWord) {
                ((HasWord) nodeLabel).setWord(headWord);
            }
            if (nodeLabel instanceof HasTag) {
                ((HasTag) nodeLabel).setTag(headTag);
            }
            if (nodeLabel instanceof HasIndex && headIndex >= 0) {
                ((HasIndex) nodeLabel).setIndex(headIndex);
            }
        } else {
            log.info("Head is null: " + this);
        }
    }
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Label(edu.stanford.nlp.ling.Label) HasTag(edu.stanford.nlp.ling.HasTag) HasIndex(edu.stanford.nlp.ling.HasIndex)

Example 8 with HasIndex

use of edu.stanford.nlp.ling.HasIndex in project CoreNLP by stanfordnlp.

the class MemoryTreebank method load.

/**
   * Load a collection of parse trees from a Reader.
   * Each tree may optionally be encased in parens to allow for Penn
   * Treebank style trees.
   *
   * @param r The reader to read trees from.  (If you want it buffered,
   *    you should already have buffered it!)
   * @param id An ID for where these files come from (arbitrary, but
   *    something like a filename.  Can be <code>null</code> for none.
   */
public void load(Reader r, String id) {
    try {
        // could throw an IO exception?
        TreeReader tr = treeReaderFactory().newTreeReader(r);
        int sentIndex = 0;
        for (Tree pt; (pt = tr.readTree()) != null; ) {
            if (pt.label() instanceof HasIndex) {
                // so we can trace where this tree came from
                HasIndex hi = (HasIndex) pt.label();
                if (id != null) {
                    hi.setDocID(id);
                }
                hi.setSentIndex(sentIndex);
            }
            parseTrees.add(pt);
            sentIndex++;
        }
    } catch (IOException e) {
        log.info("load IO Exception: " + e);
    }
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) HasIndex(edu.stanford.nlp.ling.HasIndex)

Example 9 with HasIndex

use of edu.stanford.nlp.ling.HasIndex in project CoreNLP by stanfordnlp.

the class MemoryTreebank method processFile.

/**
   * Load a collection of parse trees from the file of given name.
   * Each tree may optionally be encased in parens to allow for Penn
   * Treebank style trees.
   * This methods implements the <code>FileProcessor</code> interface.
   *
   * @param file file to load a tree from
   */
public void processFile(File file) {
    TreeReader tr = null;
    // SRL stuff
    CollectionValuedMap<Integer, String> srlMap = null;
    if (this.srlMap != null) {
        // there must be a better way ...
        String filename = file.getAbsolutePath();
        for (String suffix : this.srlMap.keySet()) {
            if (filename.endsWith(suffix)) {
                srlMap = this.srlMap.get(suffix);
                break;
            }
        }
        if (srlMap == null) {
            log.info("could not find SRL entries for file: " + file);
        }
    }
    try {
        // maybe print file name to stdout to get some feedback
        if (PRINT_FILENAMES) {
            log.info(file);
        }
        // could throw an IO exception if can't open for reading
        tr = treeReaderFactory().newTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(file), encoding())));
        int sentIndex = 0;
        Tree pt;
        while ((pt = tr.readTree()) != null) {
            if (pt.label() instanceof HasIndex) {
                // so we can trace where this tree came from
                HasIndex hi = (HasIndex) pt.label();
                hi.setDocID(file.getName());
                hi.setSentIndex(sentIndex);
            }
            if (srlMap == null) {
                parseTrees.add(pt);
            } else {
                Collection<String> srls = srlMap.get(sentIndex);
                //           pt.pennPrint();
                //           log.info(srls);
                parseTrees.add(pt);
                if (srls.isEmpty()) {
                //            parseTrees.add(pt);
                } else {
                    for (String srl : srls) {
                        //              Tree t = pt.deepCopy();
                        String[] bits = srl.split("\\s+");
                        int verbIndex = Integer.parseInt(bits[0]);
                        String lemma = bits[2].split("\\.")[0];
                        //              Tree verb = Trees.getTerminal(t, verbIndex);
                        Tree verb = Trees.getTerminal(pt, verbIndex);
                        //              ((CoreLabel)verb.label()).set(SRLIDAnnotation.class, SRL_ID.REL);
                        ((CoreLabel) verb.label()).set(CoreAnnotations.CoNLLPredicateAnnotation.class, true);
                        for (int i = 4; i < bits.length; i++) {
                            String arg = bits[i];
                            String[] bits1;
                            if (arg.indexOf("ARGM") >= 0) {
                                bits1 = arg.split("-");
                            } else {
                                bits1 = arg.split("-");
                            }
                            String locs = bits1[0];
                            String argType = bits1[1];
                            if (argType.equals("rel")) {
                                continue;
                            }
                            for (String loc : locs.split("[*,]")) {
                                bits1 = loc.split(":");
                                int term = Integer.parseInt(bits1[0]);
                                int height = Integer.parseInt(bits1[1]);
                                //                  Tree t1 = Trees.getPreTerminal(t, term);
                                Tree t1 = Trees.getPreTerminal(pt, term);
                                for (int j = 0; j < height; j++) {
                                    //                    t1 = t1.parent(t);
                                    t1 = t1.parent(pt);
                                }
                                Map<Integer, String> roleMap = ((CoreLabel) t1.label()).get(CoreAnnotations.CoNLLSRLAnnotation.class);
                                if (roleMap == null) {
                                    roleMap = Generics.newHashMap();
                                    ((CoreLabel) t1.label()).set(CoreAnnotations.CoNLLSRLAnnotation.class, roleMap);
                                }
                                roleMap.put(verbIndex, argType);
                            //                  ((CoreLabel)t1.label()).set(SRLIDAnnotation.class, SRL_ID.ARG);
                            }
                        }
                    //               for (Tree t1 : t) {
                    //                 if (t1.isLeaf()) { continue; }
                    //                 CoreLabel fl = (CoreLabel)t1.label();
                    //                 if (fl.value() == null) { continue; }
                    //                 if (!fl.has(SRLIDAnnotation.class)) {
                    //                   boolean allNone = true;
                    //                   for (Tree t2 : t1) {
                    //                     SRL_ID s = ((CoreLabel)t2.label()).get(SRLIDAnnotation.class);
                    //                     if (s == SRL_ID.ARG || s == SRL_ID.REL) {
                    //                       allNone = false;
                    //                       break;
                    //                     }
                    //                   }
                    //                   if (allNone) {
                    //                     fl.set(SRLIDAnnotation.class, SRL_ID.ALL_NO);
                    //                   } else {
                    //                     fl.set(SRLIDAnnotation.class, SRL_ID.NO);
                    //                   }
                    //                 }
                    //               }
                    //              parseTrees.add(t);
                    }
                }
            }
            sentIndex++;
        }
    } catch (IOException e) {
        throw new RuntimeIOException("MemoryTreebank.processFile IOException in file " + file, e);
    } finally {
        IOUtils.closeIgnoringExceptions(tr);
    }
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) HasIndex(edu.stanford.nlp.ling.HasIndex)

Aggregations

HasIndex (edu.stanford.nlp.ling.HasIndex)9 CoreLabel (edu.stanford.nlp.ling.CoreLabel)5 HasWord (edu.stanford.nlp.ling.HasWord)3 Label (edu.stanford.nlp.ling.Label)3 Span (edu.stanford.nlp.ie.machinereading.structure.Span)2 RuntimeIOException (edu.stanford.nlp.io.RuntimeIOException)2 HasTag (edu.stanford.nlp.ling.HasTag)2 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)1 Tree (edu.stanford.nlp.trees.Tree)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 Stack (java.util.Stack)1