Search in sources :

Example 11 with Tree

use of edu.illinois.cs.cogcomp.core.datastructures.trees.Tree in project cogcomp-nlp by CogComp.

the class CollinsHeadDependencyParser method makeDepTree.

private Tree<Pair<String, Integer>> makeDepTree(Constituent parseTreeRoot) {
    if (TreeView.isLeaf(parseTreeRoot)) {
        int position = parseTreeRoot.getStartSpan();
        return new Tree<>(new Pair<>(parseTreeRoot.getLabel(), position));
    }
    Constituent headChild = headFinder.getHeadChild(parseTreeRoot);
    Tree<Pair<String, Integer>> rootTree = null;
    List<Tree<Pair<String, Integer>>> dependentTrees = new ArrayList<>();
    List<Pair<String, Integer>> edgeLabels = new ArrayList<>();
    int conjunction = -1;
    for (Relation childEdge : parseTreeRoot.getOutgoingRelations()) {
        Constituent child = childEdge.getTarget();
        if (child == headChild) {
            rootTree = makeDepTree(child);
        } else {
            dependentTrees.add(makeDepTree(child));
            edgeLabels.add(getEdgeLabel(parseTreeRoot, headChild.getLabel(), child));
            if (child.getLabel().equals("CC")) {
                conjunction = dependentTrees.size() - 1;
            }
        }
    }
    if (conjunction >= 0) {
        return doConjunctionHack(parseTreeRoot, headChild, rootTree, dependentTrees, edgeLabels, conjunction);
    } else {
        for (int i = 0; i < dependentTrees.size(); i++) {
            rootTree.addSubtree(dependentTrees.get(i), edgeLabels.get(i));
        }
        return rootTree;
    }
}
Also used : Relation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation) ArrayList(java.util.ArrayList) Tree(edu.illinois.cs.cogcomp.core.datastructures.trees.Tree) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Example 12 with Tree

use of edu.illinois.cs.cogcomp.core.datastructures.trees.Tree in project cogcomp-nlp by CogComp.

the class GoldLabel method addAnnotation.

private void addAnnotation(TextAnnotation ta) {
    Tree<String> tree = ParseUtils.getParseTree(ViewNames.PARSE_GOLD, ta, 0);
    Tree<Pair<String, IntPair>> spanLabeledTree = ParseUtils.getSpanLabeledTree(tree);
    List<Tree<Pair<String, IntPair>>> yield = spanLabeledTree.getYield();
    PredicateArgumentView pav = new PredicateArgumentView(srlViewName, "AnnotatedTreebank", ta, 1.0);
    Set<Integer> predicates = new HashSet<>();
    for (Fields fields : goldFields.get(ta.getId())) {
        Constituent predicate = fields.createPredicate(ta, srlViewName, yield);
        if (predicates.contains(predicate.getStartSpan()))
            continue;
        predicates.add(predicate.getStartSpan());
        List<Constituent> args = new ArrayList<>();
        List<String> labels = new ArrayList<>();
        List<Double> scores = new ArrayList<>();
        // We need to make sure that the One-Argument-Per-Span constraint is
        // respected. Yes sir, we do, even if the data says otherwise!
        Set<IntPair> seenSpans = new HashSet<>();
        for (GoldLabel arg : fields.getGoldLabels()) {
            List<Constituent> aa = arg.getArgument(ta, srlViewName, yield, mergeContiguousCArgs);
            List<Constituent> filtered = new ArrayList<>();
            for (Constituent possibleArg : aa) {
                if (seenSpans.contains(possibleArg.getSpan()))
                    continue;
                seenSpans.add(possibleArg.getSpan());
                filtered.add(possibleArg);
            }
            addArguments(ta, predicate, args, labels, scores, arg, filtered);
        }
        // for each arg
        pav.addPredicateArguments(predicate, args, labels.toArray(new String[labels.size()]), ArrayUtilities.asDoubleArray(scores));
    }
    if (pav.getPredicates().size() > 0)
        ta.addView(srlViewName, pav);
}
Also used : IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) PredicateArgumentView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView) Tree(edu.illinois.cs.cogcomp.core.datastructures.trees.Tree) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Example 13 with Tree

use of edu.illinois.cs.cogcomp.core.datastructures.trees.Tree in project cogcomp-nlp by CogComp.

the class OntonotesPropbankReader method parseLines.

/**
 * parse the propbank file, producing an annotation covering the entire file. This method will
 * use the treebank reader to first produce the parse tree this method will then use to map the
 * data.
 * @param lines the data from the file, each line.
 * @return the text annotation.
 * @throws AnnotatorException
 */
protected TextAnnotation parseLines(ArrayList<String> lines) throws AnnotatorException {
    if (!this.otr.hasNext())
        throw new RuntimeException("There were not as many treebank files as there were propbank files.");
    // get the treebank parse using the ontonotes treebank reader.
    TextAnnotation ta = this.otr.next();
    if (ta == null)
        return null;
    TreeView tv = (TreeView) ta.getView(OntonotesTreebankReader.VIEW_NAME);
    // now parse out the propbank data, we will compile the data into SRLNode container
    // class instances that capture all thee data form the file, we will then need to
    // map that data to line it up with the content of the text annotation.
    ArrayList<SRLNode> srlRecords = new ArrayList<SRLNode>();
    for (String line : lines) {
        String[] splits = line.split(" ");
        if (splits != null && splits.length > 7) {
            int treeid = Integer.parseInt(splits[1]);
            int predicateid = Integer.parseInt(splits[2]);
            // there is one SRLNode per line, includes all relations to that predicate.
            SRLNode node = new SRLNode(new IntPair(treeid, predicateid), splits[5]);
            srlRecords.add(node);
            // add the relations
            for (int i = 7; i < splits.length; i++) {
                if (splits[i].contains("ARG")) {
                    // omit the predicate
                    try {
                        node.addLinks(splits[i]);
                    } catch (ParseException e) {
                        e.printStackTrace();
                    }
                }
            }
        }
    }
    // we have all the parsed SRL relations, let's try to make sense of them. We will get the
    // tree for each sentence.
    int sentenceIndx = 0;
    Tree<Constituent> tree = tv.getConstituentTree(0);
    if (debug) {
        System.out.println("\n---------------\n" + sentenceIndx + ") " + ta.getSentence(sentenceIndx) + ":" + otr.currentfile);
        System.out.println(tv.getTree(0));
    }
    // create the predicate argument view.
    PredicateArgumentView view = new PredicateArgumentView(ViewNames.SRL_VERB, ta);
    // to find terminal nodes, we will map trees per each token to it's token offset.
    HashMap<Integer, Tree<Constituent>> tokenmap = compileTokenMap(tree);
    // ensures no duplicate constituents.
    HashMap<String, Constituent> newconstituents = new HashMap<>();
    for (SRLNode node : srlRecords) {
        int nsentenceIndx = node.getLinked().getFirst();
        if (nsentenceIndx != sentenceIndx) {
            sentenceIndx = nsentenceIndx;
            tree = tv.getConstituentTree(sentenceIndx);
            tokenmap = compileTokenMap(tree);
            if (debug) {
                System.out.println("\n---------------\n" + sentenceIndx + ") " + ta.getSentence(sentenceIndx));
                System.out.println(tv.getTree(sentenceIndx));
                if (sentenceIndx > 5)
                    break;
            }
        }
        // for the node, the second of the int pair is the token offset within the sentence.
        this.addSrlFrame(view, ta, node, tokenmap, newconstituents);
    }
    ta.addView(ViewNames.SRL_VERB, view);
    return ta;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SRLNode(edu.illinois.cs.cogcomp.nlp.corpusreaders.ontonotes.utils.SRLNode) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) PredicateArgumentView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView) TreeView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView) Tree(edu.illinois.cs.cogcomp.core.datastructures.trees.Tree) ParseException(java.text.ParseException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 14 with Tree

use of edu.illinois.cs.cogcomp.core.datastructures.trees.Tree in project cogcomp-nlp by CogComp.

the class ParseHelper method getTokenIndexedParseTreeNodeCovering.

public static Tree<Pair<String, IntPair>> getTokenIndexedParseTreeNodeCovering(String parseViewName, Constituent c) {
    // / UGLY CODE ALERT!!!
    TextAnnotation ta = c.getTextAnnotation();
    int sentenceId = ta.getSentenceId(c);
    Tree<String> tree = getParseTree(parseViewName, ta, sentenceId);
    final int sentenceStartSpan = ta.getSentence(sentenceId).getStartSpan();
    int start = c.getStartSpan() - sentenceStartSpan;
    int end = c.getEndSpan() - sentenceStartSpan;
    // Find the tree that covers the start and end tokens. However, start
    // and end have been shifted relative to the start of the sentence. So
    // we need to shift it back, which is why we have that UGLY as sin
    // mapper at the end.
    Tree<Pair<String, IntPair>> toknTree = getTokenIndexedTreeCovering(tree, start, end);
    ITransformer<Tree<Pair<String, IntPair>>, Pair<String, IntPair>> transformer = new ITransformer<Tree<Pair<String, IntPair>>, Pair<String, IntPair>>() {

        @Override
        public Pair<String, IntPair> transform(Tree<Pair<String, IntPair>> input) {
            Pair<String, IntPair> label = input.getLabel();
            IntPair newSpan = new IntPair(label.getSecond().getFirst() + sentenceStartSpan, label.getSecond().getSecond() + sentenceStartSpan);
            return new Pair<>(label.getFirst(), newSpan);
        }
    };
    return Mappers.mapTree(toknTree, transformer);
}
Also used : ITransformer(edu.illinois.cs.cogcomp.core.transformers.ITransformer) Tree(edu.illinois.cs.cogcomp.core.datastructures.trees.Tree) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Example 15 with Tree

use of edu.illinois.cs.cogcomp.core.datastructures.trees.Tree in project cogcomp-nlp by CogComp.

the class ParseHelper method getPath.

/**
 * Returns a pair of paths. The first element of the pair is the path up from the start node to
 * the common ancestor of start and end. The second element is the path down from the common
 * ancestor to the end node.
 */
@Deprecated
public static <T> Pair<List<Tree<T>>, List<Tree<T>>> getPath(Tree<T> start, Tree<T> end, Tree<T> tree, int maxDepth) throws Exception {
    List<Tree<T>> p1 = getPathTreesToRoot(tree, start, maxDepth);
    List<Tree<T>> p2 = getPathTreesToRoot(tree, end, maxDepth);
    Collections.reverse(p1);
    Collections.reverse(p2);
    boolean foundAncestor = false;
    List<Tree<T>> pathUp = new ArrayList<>();
    for (Tree<T> aP1 : p1) {
        if (!foundAncestor) {
            pathUp.add(aP1);
        }
        if (p2.contains(aP1)) {
            foundAncestor = true;
            break;
        }
    }
    if (!foundAncestor)
        throw new Exception("Common ancestor not found in path down.");
    List<Tree<T>> pathDown = new ArrayList<>();
    foundAncestor = false;
    for (Tree<T> aP2 : p2) {
        if (!foundAncestor) {
            pathDown.add(aP2);
        }
        if (p1.contains(aP2)) {
            foundAncestor = true;
            break;
        }
    }
    if (!foundAncestor)
        throw new Exception("Common ancestor not found in path up.");
    Collections.reverse(pathDown);
    return new Pair<>(pathUp, pathDown);
}
Also used : ArrayList(java.util.ArrayList) Tree(edu.illinois.cs.cogcomp.core.datastructures.trees.Tree) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Aggregations

Tree (edu.illinois.cs.cogcomp.core.datastructures.trees.Tree)15 Pair (edu.illinois.cs.cogcomp.core.datastructures.Pair)10 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)7 ArrayList (java.util.ArrayList)7 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)6 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)5 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)4 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)3 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)2 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)2 ITransformer (edu.illinois.cs.cogcomp.core.transformers.ITransformer)2 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)2 IndexedWord (edu.stanford.nlp.ling.IndexedWord)2 Annotation (edu.stanford.nlp.pipeline.Annotation)2 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)2 CoreMap (edu.stanford.nlp.util.CoreMap)2 TokenLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView)1 DepInst (edu.illinois.cs.cogcomp.depparse.core.DepInst)1 DepStruct (edu.illinois.cs.cogcomp.depparse.core.DepStruct)1 SRLNode (edu.illinois.cs.cogcomp.nlp.corpusreaders.ontonotes.utils.SRLNode)1