Search in sources :

Example 21 with Pair

use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.

the class ParseHelper method getTokenIndexedTreeCovering.

/**
     * From a parse tree and a span that is specified with the start and end (exclusive), this
     * function returns a tree that corresponds to the subtree that covers the span. Each node in
     * the new tree corresponds to a node in the input tree and is labeled with the label of the
     * original node along with the span that this node covered in the original tree.
     *
     * @return A new tree that covers the specified span and each node specifies the label and the
     *         span of the original tree that it covers.
     */
public static Tree<Pair<String, IntPair>> getTokenIndexedTreeCovering(Tree<String> parse, int start, int end) {
    Tree<Pair<String, IntPair>> current = ParseUtils.getSpanLabeledTree(parse);
    while (current != null) {
        IntPair span = current.getLabel().getSecond();
        if (span.getFirst() == start && span.getSecond() == end) {
            return current;
        } else {
            boolean found = false;
            for (Tree<Pair<String, IntPair>> child : current.getChildren()) {
                if (child.getLabel().getSecond().getFirst() <= start && child.getLabel().getSecond().getSecond() >= end) {
                    current = child;
                    found = true;
                    break;
                }
            }
            if (!found)
                break;
        }
    }
    return current;
}
Also used : IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Example 22 with Pair

use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.

the class PathFeatureHelper method getPathsToCommonAncestor.

/**
     * Get the paths from two constituents to their common ancestor. Each path is truncated to a
     * length of maxDepth if it is longer than that.
     * <p>
     * <b>Note:</b> This function requires the two constituents to be from the same {@link View}.
     *
     * @throws IllegalArgumentException If no common ancestor is found.
     * @see edu.illinois.cs.cogcomp.edison.features.helpers.PathFeatureHelper#getPathToRoot(Constituent,
     *      int)
     */
public static Pair<List<Constituent>, List<Constituent>> getPathsToCommonAncestor(Constituent start, Constituent end, int maxDepth) {
    assert start.getView() == end.getView() : "Cannot find paths across different views. " + "The start and end constituents should be from the same view.";
    List<Constituent> p1 = getPathToRoot(start, maxDepth);
    List<Constituent> p2 = getPathToRoot(end, maxDepth);
    Set<Constituent> s1 = new LinkedHashSet<>(p1);
    Set<Constituent> s2 = new LinkedHashSet<>(p2);
    boolean foundAncestor = false;
    List<Constituent> pathUp = new ArrayList<>();
    for (Constituent aP1 : p1) {
        if (!foundAncestor) {
            pathUp.add(aP1);
        }
        if (s2.contains(aP1)) {
            foundAncestor = true;
            break;
        }
    }
    if (!foundAncestor)
        throw new IllegalArgumentException("Common ancestor not found in path down.");
    List<Constituent> pathDown = new ArrayList<>();
    foundAncestor = false;
    for (Constituent aP2 : p2) {
        if (!foundAncestor) {
            pathDown.add(aP2);
        }
        if (s1.contains(aP2)) {
            foundAncestor = true;
            break;
        }
    }
    if (!foundAncestor)
        throw new IllegalArgumentException("Common ancestor not found in path up.");
    return new Pair<>(pathUp, pathDown);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) ArrayList(java.util.ArrayList) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Example 23 with Pair

use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.

the class CharacterTokenizer method tokenizeSentence.

/**
     * given a sentence, return a set of tokens and their character offsets
     *
     * @param sentence The sentence string
     * @return A {@link Pair} containing the array of tokens and their character offsets
     */
@Override
public Pair<String[], IntPair[]> tokenizeSentence(String sentence) {
    List<IntPair> offsets = new ArrayList<>();
    List<String> surfaces = new ArrayList<>();
    for (int i = 0; i < sentence.length(); i++) {
        String c = sentence.substring(i, i + 1).trim();
        if (!c.isEmpty() && !c.equals(" ")) {
            surfaces.add(c);
            offsets.add(new IntPair(i, i + 1));
        }
    }
    IntPair[] offs = new IntPair[offsets.size()];
    offs = offsets.toArray(offs);
    String[] surfs = new String[surfaces.size()];
    surfs = surfaces.toArray(surfs);
    return new Pair<>(surfs, offs);
}
Also used : ArrayList(java.util.ArrayList) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Example 24 with Pair

use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.

the class StanfordAnalyzer method tokenizeSentence.

/**
     * given a sentence, return a set of tokens and their character offsets
     *
     * @param sentenceText The sentence string
     * @return A {@link Pair} containing the array of tokens and their character offsets
     */
@Override
public Pair<String[], IntPair[]> tokenizeSentence(String sentenceText) {
    Annotation document = new Annotation(sentenceText);
    pipeline.annotate(document);
    List<CoreLabel> tokens = new ArrayList<>();
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    int[] sen_ends = new int[sentences.size()];
    int sen_idx = 0;
    for (CoreMap sentence : sentences) {
        for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            tokens.add(token);
        }
        sen_ends[sen_idx++] = tokens.size();
    }
    String[] surfaces = new String[tokens.size()];
    IntPair[] tokenCharOffsets = new IntPair[tokens.size()];
    for (int i = 0; i < tokens.size(); i++) {
        surfaces[i] = tokens.get(i).originalText();
        tokenCharOffsets[i] = new IntPair(tokens.get(i).beginPosition(), tokens.get(i).endPosition());
    //            System.out.println(surfaces[i]);
    //            System.out.println(tokenCharOffsets[i]);
    }
    return new Pair(surfaces, tokenCharOffsets);
}
Also used : ArrayList(java.util.ArrayList) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Example 25 with Pair

use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.

the class StanfordDepHandler method addView.

@Override
public void addView(TextAnnotation textAnnotation) throws AnnotatorException {
    // If the sentence is longer than STFRD_MAX_SENTENCE_LENGTH there is no point in trying to
    // parse
    StanfordParseHandler.checkLength(textAnnotation, throwExceptionOnSentenceLengthCheck, maxParseSentenceLength);
    TreeView treeView = new TreeView(ViewNames.DEPENDENCY_STANFORD, "StanfordDepHandler", textAnnotation, 1d);
    // The (tokenized) sentence offset in case we have more than one sentences in the record
    List<CoreMap> sentences = StanfordParseHandler.buildStanfordSentences(textAnnotation);
    Annotation document = new Annotation(sentences);
    posAnnotator.annotate(document);
    parseAnnotator.annotate(document);
    sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    if (sentences.get(0).get(TreeCoreAnnotations.TreeAnnotation.class).nodeString().equals("X")) {
        // This is most like because we ran out of time
        throw new AnnotatorException("Unable to parse TextAnnotation " + textAnnotation.getId() + ". " + "This is most likely due to a timeout.");
    }
    for (int sentenceId = 0; sentenceId < sentences.size(); sentenceId++) {
        boolean runtimeExceptionWasThrown = false;
        CoreMap sentence = sentences.get(sentenceId);
        if (maxParseSentenceLength > 0 && sentence.size() > maxParseSentenceLength) {
            logger.warn(HandlerUtils.getSentenceLengthError(textAnnotation.getId(), sentence.toString(), maxParseSentenceLength));
        } else {
            SemanticGraph depGraph = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
            IndexedWord root = null;
            try {
                root = depGraph.getFirstRoot();
            } catch (RuntimeException e) {
                String msg = "ERROR in getting root of dep graph for sentence.  Sentence is:\n" + sentence.toString() + "'\nDependency graph is:\n" + depGraph.toCompactString() + "\nText is:\n" + textAnnotation.getText();
                logger.error(msg);
                System.err.println(msg);
                e.printStackTrace();
                if (throwExceptionOnSentenceLengthCheck)
                    throw e;
                else
                    runtimeExceptionWasThrown = true;
            }
            if (!runtimeExceptionWasThrown) {
                int tokenStart = getNodePosition(textAnnotation, root, sentenceId);
                Pair<String, Integer> nodePair = new Pair<>(root.originalText(), tokenStart);
                Tree<Pair<String, Integer>> tree = new Tree<>(nodePair);
                populateChildren(depGraph, root, tree, textAnnotation, sentenceId);
                treeView.setDependencyTree(sentenceId, tree);
            }
        }
    }
    textAnnotation.addView(getViewName(), treeView);
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation) TreeView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) Tree(edu.illinois.cs.cogcomp.core.datastructures.trees.Tree) IndexedWord(edu.stanford.nlp.ling.IndexedWord) CoreMap(edu.stanford.nlp.util.CoreMap) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Aggregations

Pair (edu.illinois.cs.cogcomp.core.datastructures.Pair)59 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)35 ArrayList (java.util.ArrayList)17 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)10 Tree (edu.illinois.cs.cogcomp.core.datastructures.trees.Tree)10 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)7 Matcher (java.util.regex.Matcher)7 Paragraph (edu.illinois.cs.cogcomp.nlp.corpusreaders.aceReader.Paragraph)6 HashMap (java.util.HashMap)6 Pattern (java.util.regex.Pattern)6 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)3 SenseInstance (edu.illinois.cs.cogcomp.verbsense.jlis.SenseInstance)3 SenseStructure (edu.illinois.cs.cogcomp.verbsense.jlis.SenseStructure)3 JsonObject (com.google.gson.JsonObject)2 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)2 ITransformer (edu.illinois.cs.cogcomp.core.transformers.ITransformer)2 IndexedWord (edu.stanford.nlp.ling.IndexedWord)2 Annotation (edu.stanford.nlp.pipeline.Annotation)2 CoreMap (edu.stanford.nlp.util.CoreMap)2 LinkedHashSet (java.util.LinkedHashSet)2