Search in sources :

Example 36 with Pair

use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.

the class XmlDocumentProcessor method compileAttributeValues.

/**
     * builds a map of attribute value offsets to attribute value to support search for metadata matching
     *    entity mentions
     *
     * @param xmlMarkup xml span information collected from source document
     * @return a map from attribute value character offsets in source text to attribute value
     */
public static Map<IntPair, Set<String>> compileAttributeValues(List<SpanInfo> xmlMarkup) {
    Map<IntPair, Set<String>> attrVals = new HashMap<>();
    for (XmlDocumentProcessor.SpanInfo si : xmlMarkup) {
        for (Map.Entry<String, Pair<String, IntPair>> e : si.attributes.entrySet()) {
            IntPair offset = e.getValue().getSecond();
            Set<String> vals = attrVals.get(offset);
            if (null == vals) {
                vals = new HashSet<>();
                attrVals.put(offset, vals);
            }
            vals.add(e.getValue().getFirst());
        }
    }
    return attrVals;
}
Also used : IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Example 37 with Pair

use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.

the class HeadFinderBase method findHead.

protected Constituent findHead(Constituent parseNode) {
    String label = parseNode.getLabel();
    label = ParseUtils.stripFunctionTags(label);
    List<Pair<HeadSearchDirection, String[]>> headInfo = getNonterminalHeadInformation(label);
    if (headInfo == null) {
        // Use default rule.
        if (defaultRule == null)
            return null;
        else
            return findHead(parseNode, defaultRule, true);
    }
    for (int i = 0; i < headInfo.size(); i++) {
        boolean getDefault = i == headInfo.size() - 1;
        Constituent head = findHead(parseNode, headInfo.get(i), getDefault);
        if (head != null)
            return head;
    }
    return null;
}
Also used : Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Example 38 with Pair

use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.

the class ParseUtils method getTokenIndexedTreeCovering.

/**
     * From a parse tree and a span that is specified with the start and end (exclusive), this
     * function returns a tree that corresponds to the subtree that covers the span. Each node in
     * the new tree corresponds to a node in the input tree and is labeled with the label of the
     * original node along with the span that this node covered in the original tree.
     *
     * @return A new tree that covers the specified span and each node specifies the label and the
     *         span of the original tree that it covers.
     */
public static Tree<Pair<String, IntPair>> getTokenIndexedTreeCovering(Tree<String> parse, int start, int end) {
    Tree<Pair<String, IntPair>> spanLabeledTree = ParseUtils.getSpanLabeledTree(parse);
    Tree<Pair<String, IntPair>> current = spanLabeledTree;
    while (current != null) {
        IntPair span = current.getLabel().getSecond();
        if (span.getFirst() == start && span.getSecond() == end) {
            return current;
        } else {
            boolean found = false;
            for (Tree<Pair<String, IntPair>> child : current.getChildren()) {
                if (child.getLabel().getSecond().getFirst() <= start && child.getLabel().getSecond().getSecond() >= end) {
                    current = child;
                    found = true;
                    break;
                }
            }
            if (!found)
                break;
        }
    }
    return current;
}
Also used : IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Example 39 with Pair

use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.

the class ParseUtils method getPhraseFromHead.

/**
     * Primarily a fix for prepSRL objects; converts them from single head words to constituents.
     * E.g. for the sentence "the man with the telescope", the object of the preposition will be
     * "the telescope" instead of just "telescope".
     *
     * @param predicate The predicate of the construction (e.g. "with")
     * @param argHead The head-word of the argument of the construction (e.g. "telescope")
     * @param parseViewName The name of the parse view used to extract the phrase-structure tree
     * @return The full constituent phrase containing the argument head
     */
public static Constituent getPhraseFromHead(Constituent predicate, Constituent argHead, String parseViewName) {
    // Get the path from the argument to the preposition
    // but only if the predicate node "m-commands" the arg
    TextAnnotation ta = argHead.getTextAnnotation();
    int sentenceOffset = ta.getSentence(ta.getSentenceId(argHead)).getStartSpan();
    int argStart = argHead.getStartSpan() - sentenceOffset;
    Tree<Pair<String, IntPair>> predParentTree = getTokenIndexedTreeCovering(predicate, parseViewName).getParent();
    boolean found = false;
    for (Tree<Pair<String, IntPair>> s : predParentTree.getYield()) {
        if (s.getLabel().getSecond().getFirst() == argStart)
            found = true;
    }
    if (!found)
        return null;
    // Now follow the path from the argument node to get to the preposition
    Tree<Pair<String, IntPair>> argPhrase = getTokenIndexedTreeCovering(argHead, parseViewName);
    while (!checkForPredicate(argPhrase.getParent(), predicate.getStartSpan() - sentenceOffset)) {
        if (argPhrase.getParent() == null)
            break;
        argPhrase = argPhrase.getParent();
    }
    // If the phrase covering the constituent is the whole sentence then the annotation is wrong
    if (argPhrase.getParent() == null)
        return null;
    int start = predicate.getStartSpan() + 1;
    int end = start + argPhrase.getYield().size();
    return new Constituent(argHead.getLabel(), argHead.getViewName(), argHead.getTextAnnotation(), start, end);
}
Also used : TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Example 40 with Pair

use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.

the class JsonSerializer method readTextAnnotation.

TextAnnotation readTextAnnotation(String string) throws Exception {
    JsonObject json = (JsonObject) new JsonParser().parse(string);
    String corpusId = readString("corpusId", json);
    String id = readString("id", json);
    String text = readString("text", json);
    String[] tokens = readStringArray("tokens", json);
    Pair<Pair<String, Double>, int[]> sentences = readSentences(json);
    IntPair[] offsets = TokenUtils.getTokenOffsets(text, tokens);
    TextAnnotation ta = new TextAnnotation(corpusId, id, text, offsets, tokens, sentences.getSecond());
    JsonArray views = json.getAsJsonArray("views");
    for (int i = 0; i < views.size(); i++) {
        JsonObject view = (JsonObject) views.get(i);
        String viewName = readString("viewName", view);
        JsonArray viewData = view.getAsJsonArray("viewData");
        List<View> topKViews = new ArrayList<>();
        for (int k = 0; k < viewData.size(); k++) {
            JsonObject kView = (JsonObject) viewData.get(k);
            topKViews.add(readView(kView, ta));
        }
        ta.addTopKView(viewName, topKViews);
    }
    readAttributes(ta, json);
    return ta;
}
Also used : JsonObject(com.google.gson.JsonObject) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) JsonArray(com.google.gson.JsonArray) JsonParser(com.google.gson.JsonParser) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Aggregations

Pair (edu.illinois.cs.cogcomp.core.datastructures.Pair)59 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)35 ArrayList (java.util.ArrayList)17 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)10 Tree (edu.illinois.cs.cogcomp.core.datastructures.trees.Tree)10 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)7 Matcher (java.util.regex.Matcher)7 Paragraph (edu.illinois.cs.cogcomp.nlp.corpusreaders.aceReader.Paragraph)6 HashMap (java.util.HashMap)6 Pattern (java.util.regex.Pattern)6 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)3 SenseInstance (edu.illinois.cs.cogcomp.verbsense.jlis.SenseInstance)3 SenseStructure (edu.illinois.cs.cogcomp.verbsense.jlis.SenseStructure)3 JsonObject (com.google.gson.JsonObject)2 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)2 ITransformer (edu.illinois.cs.cogcomp.core.transformers.ITransformer)2 IndexedWord (edu.stanford.nlp.ling.IndexedWord)2 Annotation (edu.stanford.nlp.pipeline.Annotation)2 CoreMap (edu.stanford.nlp.util.CoreMap)2 LinkedHashSet (java.util.LinkedHashSet)2