use of edu.illinois.cs.cogcomp.core.datastructures.trees.Tree in project cogcomp-nlp by CogComp.
the class CollinsHeadDependencyParser method makeDepTree.
private Tree<Pair<String, Integer>> makeDepTree(Constituent parseTreeRoot) {
if (TreeView.isLeaf(parseTreeRoot)) {
int position = parseTreeRoot.getStartSpan();
return new Tree<>(new Pair<>(parseTreeRoot.getLabel(), position));
}
Constituent headChild = headFinder.getHeadChild(parseTreeRoot);
Tree<Pair<String, Integer>> rootTree = null;
List<Tree<Pair<String, Integer>>> dependentTrees = new ArrayList<>();
List<Pair<String, Integer>> edgeLabels = new ArrayList<>();
int conjunction = -1;
for (Relation childEdge : parseTreeRoot.getOutgoingRelations()) {
Constituent child = childEdge.getTarget();
if (child == headChild) {
rootTree = makeDepTree(child);
} else {
dependentTrees.add(makeDepTree(child));
edgeLabels.add(getEdgeLabel(parseTreeRoot, headChild.getLabel(), child));
if (child.getLabel().equals("CC")) {
conjunction = dependentTrees.size() - 1;
}
}
}
if (conjunction >= 0) {
return doConjunctionHack(parseTreeRoot, headChild, rootTree, dependentTrees, edgeLabels, conjunction);
} else {
for (int i = 0; i < dependentTrees.size(); i++) {
rootTree.addSubtree(dependentTrees.get(i), edgeLabels.get(i));
}
return rootTree;
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.trees.Tree in project cogcomp-nlp by CogComp.
the class GoldLabel method addAnnotation.
private void addAnnotation(TextAnnotation ta) {
Tree<String> tree = ParseUtils.getParseTree(ViewNames.PARSE_GOLD, ta, 0);
Tree<Pair<String, IntPair>> spanLabeledTree = ParseUtils.getSpanLabeledTree(tree);
List<Tree<Pair<String, IntPair>>> yield = spanLabeledTree.getYield();
PredicateArgumentView pav = new PredicateArgumentView(srlViewName, "AnnotatedTreebank", ta, 1.0);
Set<Integer> predicates = new HashSet<>();
for (Fields fields : goldFields.get(ta.getId())) {
Constituent predicate = fields.createPredicate(ta, srlViewName, yield);
if (predicates.contains(predicate.getStartSpan()))
continue;
predicates.add(predicate.getStartSpan());
List<Constituent> args = new ArrayList<>();
List<String> labels = new ArrayList<>();
List<Double> scores = new ArrayList<>();
// We need to make sure that the One-Argument-Per-Span constraint is
// respected. Yes sir, we do, even if the data says otherwise!
Set<IntPair> seenSpans = new HashSet<>();
for (GoldLabel arg : fields.getGoldLabels()) {
List<Constituent> aa = arg.getArgument(ta, srlViewName, yield, mergeContiguousCArgs);
List<Constituent> filtered = new ArrayList<>();
for (Constituent possibleArg : aa) {
if (seenSpans.contains(possibleArg.getSpan()))
continue;
seenSpans.add(possibleArg.getSpan());
filtered.add(possibleArg);
}
addArguments(ta, predicate, args, labels, scores, arg, filtered);
}
// for each arg
pav.addPredicateArguments(predicate, args, labels.toArray(new String[labels.size()]), ArrayUtilities.asDoubleArray(scores));
}
if (pav.getPredicates().size() > 0)
ta.addView(srlViewName, pav);
}
use of edu.illinois.cs.cogcomp.core.datastructures.trees.Tree in project cogcomp-nlp by CogComp.
the class OntonotesPropbankReader method parseLines.
/**
* parse the propbank file, producing an annotation covering the entire file. This method will
* use the treebank reader to first produce the parse tree this method will then use to map the
* data.
* @param lines the data from the file, each line.
* @return the text annotation.
* @throws AnnotatorException
*/
protected TextAnnotation parseLines(ArrayList<String> lines) throws AnnotatorException {
if (!this.otr.hasNext())
throw new RuntimeException("There were not as many treebank files as there were propbank files.");
// get the treebank parse using the ontonotes treebank reader.
TextAnnotation ta = this.otr.next();
if (ta == null)
return null;
TreeView tv = (TreeView) ta.getView(OntonotesTreebankReader.VIEW_NAME);
// now parse out the propbank data, we will compile the data into SRLNode container
// class instances that capture all thee data form the file, we will then need to
// map that data to line it up with the content of the text annotation.
ArrayList<SRLNode> srlRecords = new ArrayList<SRLNode>();
for (String line : lines) {
String[] splits = line.split(" ");
if (splits != null && splits.length > 7) {
int treeid = Integer.parseInt(splits[1]);
int predicateid = Integer.parseInt(splits[2]);
// there is one SRLNode per line, includes all relations to that predicate.
SRLNode node = new SRLNode(new IntPair(treeid, predicateid), splits[5]);
srlRecords.add(node);
// add the relations
for (int i = 7; i < splits.length; i++) {
if (splits[i].contains("ARG")) {
// omit the predicate
try {
node.addLinks(splits[i]);
} catch (ParseException e) {
e.printStackTrace();
}
}
}
}
}
// we have all the parsed SRL relations, let's try to make sense of them. We will get the
// tree for each sentence.
int sentenceIndx = 0;
Tree<Constituent> tree = tv.getConstituentTree(0);
if (debug) {
System.out.println("\n---------------\n" + sentenceIndx + ") " + ta.getSentence(sentenceIndx) + ":" + otr.currentfile);
System.out.println(tv.getTree(0));
}
// create the predicate argument view.
PredicateArgumentView view = new PredicateArgumentView(ViewNames.SRL_VERB, ta);
// to find terminal nodes, we will map trees per each token to it's token offset.
HashMap<Integer, Tree<Constituent>> tokenmap = compileTokenMap(tree);
// ensures no duplicate constituents.
HashMap<String, Constituent> newconstituents = new HashMap<>();
for (SRLNode node : srlRecords) {
int nsentenceIndx = node.getLinked().getFirst();
if (nsentenceIndx != sentenceIndx) {
sentenceIndx = nsentenceIndx;
tree = tv.getConstituentTree(sentenceIndx);
tokenmap = compileTokenMap(tree);
if (debug) {
System.out.println("\n---------------\n" + sentenceIndx + ") " + ta.getSentence(sentenceIndx));
System.out.println(tv.getTree(sentenceIndx));
if (sentenceIndx > 5)
break;
}
}
// for the node, the second of the int pair is the token offset within the sentence.
this.addSrlFrame(view, ta, node, tokenmap, newconstituents);
}
ta.addView(ViewNames.SRL_VERB, view);
return ta;
}
use of edu.illinois.cs.cogcomp.core.datastructures.trees.Tree in project cogcomp-nlp by CogComp.
the class ParseHelper method getTokenIndexedParseTreeNodeCovering.
public static Tree<Pair<String, IntPair>> getTokenIndexedParseTreeNodeCovering(String parseViewName, Constituent c) {
// / UGLY CODE ALERT!!!
TextAnnotation ta = c.getTextAnnotation();
int sentenceId = ta.getSentenceId(c);
Tree<String> tree = getParseTree(parseViewName, ta, sentenceId);
final int sentenceStartSpan = ta.getSentence(sentenceId).getStartSpan();
int start = c.getStartSpan() - sentenceStartSpan;
int end = c.getEndSpan() - sentenceStartSpan;
// Find the tree that covers the start and end tokens. However, start
// and end have been shifted relative to the start of the sentence. So
// we need to shift it back, which is why we have that UGLY as sin
// mapper at the end.
Tree<Pair<String, IntPair>> toknTree = getTokenIndexedTreeCovering(tree, start, end);
ITransformer<Tree<Pair<String, IntPair>>, Pair<String, IntPair>> transformer = new ITransformer<Tree<Pair<String, IntPair>>, Pair<String, IntPair>>() {
@Override
public Pair<String, IntPair> transform(Tree<Pair<String, IntPair>> input) {
Pair<String, IntPair> label = input.getLabel();
IntPair newSpan = new IntPair(label.getSecond().getFirst() + sentenceStartSpan, label.getSecond().getSecond() + sentenceStartSpan);
return new Pair<>(label.getFirst(), newSpan);
}
};
return Mappers.mapTree(toknTree, transformer);
}
use of edu.illinois.cs.cogcomp.core.datastructures.trees.Tree in project cogcomp-nlp by CogComp.
the class ParseHelper method getPath.
/**
* Returns a pair of paths. The first element of the pair is the path up from the start node to
* the common ancestor of start and end. The second element is the path down from the common
* ancestor to the end node.
*/
@Deprecated
public static <T> Pair<List<Tree<T>>, List<Tree<T>>> getPath(Tree<T> start, Tree<T> end, Tree<T> tree, int maxDepth) throws Exception {
List<Tree<T>> p1 = getPathTreesToRoot(tree, start, maxDepth);
List<Tree<T>> p2 = getPathTreesToRoot(tree, end, maxDepth);
Collections.reverse(p1);
Collections.reverse(p2);
boolean foundAncestor = false;
List<Tree<T>> pathUp = new ArrayList<>();
for (Tree<T> aP1 : p1) {
if (!foundAncestor) {
pathUp.add(aP1);
}
if (p2.contains(aP1)) {
foundAncestor = true;
break;
}
}
if (!foundAncestor)
throw new Exception("Common ancestor not found in path down.");
List<Tree<T>> pathDown = new ArrayList<>();
foundAncestor = false;
for (Tree<T> aP2 : p2) {
if (!foundAncestor) {
pathDown.add(aP2);
}
if (p1.contains(aP2)) {
foundAncestor = true;
break;
}
}
if (!foundAncestor)
throw new Exception("Common ancestor not found in path up.");
Collections.reverse(pathDown);
return new Pair<>(pathUp, pathDown);
}
Aggregations