use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.
the class StanfordDepHandler method populateChildren.
private void populateChildren(SemanticGraph depGraph, IndexedWord root, Tree<Pair<String, Integer>> tree, TextAnnotation ta, int sentId) {
if (depGraph.getChildren(root).size() == 0)
return;
for (IndexedWord child : depGraph.getChildren(root)) {
int childPosition = getNodePosition(ta, child, sentId);
Pair<String, Integer> nodePair = new Pair<>(child.originalText(), childPosition);
Tree<Pair<String, Integer>> childTree = new Tree<>(nodePair);
tree.addSubtree(childTree, new Pair<>(depGraph.getEdge(root, child).toString(), childPosition));
populateChildren(depGraph, child, childTree, ta, sentId);
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.
the class VerbVoiceIndicator method getWordFeatures.
@Override
public Set<Feature> getWordFeatures(TextAnnotation ta, int wordPosition) throws EdisonException {
Sentence sentence = ta.getSentenceFromToken(wordPosition);
int sentenceStart = sentence.getStartSpan();
int predicatePosition = wordPosition - sentenceStart;
Tree<String> tree = ParseHelper.getParseTree(parseViewName, sentence);
Tree<Pair<String, IntPair>> spanLabeledTree = ParseUtils.getSpanLabeledTree(tree);
Tree<Pair<String, IntPair>> currentNode = spanLabeledTree.getYield().get(predicatePosition).getParent();
String f = getVoice(currentNode);
return new LinkedHashSet<Feature>(Collections.singletonList(DiscreteFeature.create(f)));
}
use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.
the class ParseHelper method getPath.
/**
* Returns a pair of paths. The first element of the pair is the path up from the start node to
* the common ancestor of start and end. The second element is the path down from the common
* ancestor to the end node.
*/
@Deprecated
public static <T> Pair<List<Tree<T>>, List<Tree<T>>> getPath(Tree<T> start, Tree<T> end, Tree<T> tree, int maxDepth) throws Exception {
List<Tree<T>> p1 = getPathTreesToRoot(tree, start, maxDepth);
List<Tree<T>> p2 = getPathTreesToRoot(tree, end, maxDepth);
Collections.reverse(p1);
Collections.reverse(p2);
boolean foundAncestor = false;
List<Tree<T>> pathUp = new ArrayList<>();
for (Tree<T> aP1 : p1) {
if (!foundAncestor) {
pathUp.add(aP1);
}
if (p2.contains(aP1)) {
foundAncestor = true;
break;
}
}
if (!foundAncestor)
throw new Exception("Common ancestor not found in path down.");
List<Tree<T>> pathDown = new ArrayList<>();
foundAncestor = false;
for (Tree<T> aP2 : p2) {
if (!foundAncestor) {
pathDown.add(aP2);
}
if (p1.contains(aP2)) {
foundAncestor = true;
break;
}
}
if (!foundAncestor)
throw new Exception("Common ancestor not found in path up.");
Collections.reverse(pathDown);
return new Pair<>(pathUp, pathDown);
}
use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.
the class ParseHelper method getTokenIndexedParseTreeNodeCovering.
public static Tree<Pair<String, IntPair>> getTokenIndexedParseTreeNodeCovering(String parseViewName, Constituent c) {
// / UGLY CODE ALERT!!!
TextAnnotation ta = c.getTextAnnotation();
int sentenceId = ta.getSentenceId(c);
Tree<String> tree = getParseTree(parseViewName, ta, sentenceId);
final int sentenceStartSpan = ta.getSentence(sentenceId).getStartSpan();
int start = c.getStartSpan() - sentenceStartSpan;
int end = c.getEndSpan() - sentenceStartSpan;
// Find the tree that covers the start and end tokens. However, start
// and end have been shifted relative to the start of the sentence. So
// we need to shift it back, which is why we have that UGLY as sin
// mapper at the end.
Tree<Pair<String, IntPair>> toknTree = getTokenIndexedTreeCovering(tree, start, end);
ITransformer<Tree<Pair<String, IntPair>>, Pair<String, IntPair>> transformer = new ITransformer<Tree<Pair<String, IntPair>>, Pair<String, IntPair>>() {
@Override
public Pair<String, IntPair> transform(Tree<Pair<String, IntPair>> input) {
Pair<String, IntPair> label = input.getLabel();
IntPair newSpan = new IntPair(label.getSecond().getFirst() + sentenceStartSpan, label.getSecond().getSecond() + sentenceStartSpan);
return new Pair<>(label.getFirst(), newSpan);
}
};
return Mappers.mapTree(toknTree, transformer);
}
use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.
the class IllinoisTokenizer method tokenizeSentence.
/**
* given a sentence, return a set of tokens and their character offsets
*
* @param sentence the plain text sentence to tokenize
* @return an ordered list of tokens from the sentence, and an ordered list of their start and
* end character offsets (one-past-the-end indexing)
*/
@Override
public Pair<String[], IntPair[]> tokenizeSentence(String sentence) {
Sentence lbjSentence = new Sentence(sentence);
LinkedVector wordSplit = lbjSentence.wordSplit();
String[] output = new String[wordSplit.size()];
IntPair[] offsets = new IntPair[wordSplit.size()];
for (int i = 0; i < output.length; i++) {
LinkedChild linkedChild = wordSplit.get(i);
output[i] = linkedChild.toString();
offsets[i] = new IntPair(linkedChild.start, linkedChild.end + 1);
}
return new Pair<>(output, offsets);
}
Aggregations