use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.
the class XmlDocumentProcessor method compileAttributeValues.
/**
* builds a map of attribute value offsets to attribute value to support search for metadata matching
* entity mentions
*
* @param xmlMarkup xml span information collected from source document
* @return a map from attribute value character offsets in source text to attribute value
*/
public static Map<IntPair, Set<String>> compileAttributeValues(List<SpanInfo> xmlMarkup) {
Map<IntPair, Set<String>> attrVals = new HashMap<>();
for (XmlDocumentProcessor.SpanInfo si : xmlMarkup) {
for (Map.Entry<String, Pair<String, IntPair>> e : si.attributes.entrySet()) {
IntPair offset = e.getValue().getSecond();
Set<String> vals = attrVals.get(offset);
if (null == vals) {
vals = new HashSet<>();
attrVals.put(offset, vals);
}
vals.add(e.getValue().getFirst());
}
}
return attrVals;
}
use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.
the class HeadFinderBase method findHead.
protected Constituent findHead(Constituent parseNode) {
String label = parseNode.getLabel();
label = ParseUtils.stripFunctionTags(label);
List<Pair<HeadSearchDirection, String[]>> headInfo = getNonterminalHeadInformation(label);
if (headInfo == null) {
// Use default rule.
if (defaultRule == null)
return null;
else
return findHead(parseNode, defaultRule, true);
}
for (int i = 0; i < headInfo.size(); i++) {
boolean getDefault = i == headInfo.size() - 1;
Constituent head = findHead(parseNode, headInfo.get(i), getDefault);
if (head != null)
return head;
}
return null;
}
use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.
the class ParseUtils method getTokenIndexedTreeCovering.
/**
* From a parse tree and a span that is specified with the start and end (exclusive), this
* function returns a tree that corresponds to the subtree that covers the span. Each node in
* the new tree corresponds to a node in the input tree and is labeled with the label of the
* original node along with the span that this node covered in the original tree.
*
* @return A new tree that covers the specified span and each node specifies the label and the
* span of the original tree that it covers.
*/
public static Tree<Pair<String, IntPair>> getTokenIndexedTreeCovering(Tree<String> parse, int start, int end) {
Tree<Pair<String, IntPair>> spanLabeledTree = ParseUtils.getSpanLabeledTree(parse);
Tree<Pair<String, IntPair>> current = spanLabeledTree;
while (current != null) {
IntPair span = current.getLabel().getSecond();
if (span.getFirst() == start && span.getSecond() == end) {
return current;
} else {
boolean found = false;
for (Tree<Pair<String, IntPair>> child : current.getChildren()) {
if (child.getLabel().getSecond().getFirst() <= start && child.getLabel().getSecond().getSecond() >= end) {
current = child;
found = true;
break;
}
}
if (!found)
break;
}
}
return current;
}
use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.
the class ParseUtils method getPhraseFromHead.
/**
* Primarily a fix for prepSRL objects; converts them from single head words to constituents.
* E.g. for the sentence "the man with the telescope", the object of the preposition will be
* "the telescope" instead of just "telescope".
*
* @param predicate The predicate of the construction (e.g. "with")
* @param argHead The head-word of the argument of the construction (e.g. "telescope")
* @param parseViewName The name of the parse view used to extract the phrase-structure tree
* @return The full constituent phrase containing the argument head
*/
public static Constituent getPhraseFromHead(Constituent predicate, Constituent argHead, String parseViewName) {
// Get the path from the argument to the preposition
// but only if the predicate node "m-commands" the arg
TextAnnotation ta = argHead.getTextAnnotation();
int sentenceOffset = ta.getSentence(ta.getSentenceId(argHead)).getStartSpan();
int argStart = argHead.getStartSpan() - sentenceOffset;
Tree<Pair<String, IntPair>> predParentTree = getTokenIndexedTreeCovering(predicate, parseViewName).getParent();
boolean found = false;
for (Tree<Pair<String, IntPair>> s : predParentTree.getYield()) {
if (s.getLabel().getSecond().getFirst() == argStart)
found = true;
}
if (!found)
return null;
// Now follow the path from the argument node to get to the preposition
Tree<Pair<String, IntPair>> argPhrase = getTokenIndexedTreeCovering(argHead, parseViewName);
while (!checkForPredicate(argPhrase.getParent(), predicate.getStartSpan() - sentenceOffset)) {
if (argPhrase.getParent() == null)
break;
argPhrase = argPhrase.getParent();
}
// If the phrase covering the constituent is the whole sentence then the annotation is wrong
if (argPhrase.getParent() == null)
return null;
int start = predicate.getStartSpan() + 1;
int end = start + argPhrase.getYield().size();
return new Constituent(argHead.getLabel(), argHead.getViewName(), argHead.getTextAnnotation(), start, end);
}
use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.
the class JsonSerializer method readTextAnnotation.
TextAnnotation readTextAnnotation(String string) throws Exception {
JsonObject json = (JsonObject) new JsonParser().parse(string);
String corpusId = readString("corpusId", json);
String id = readString("id", json);
String text = readString("text", json);
String[] tokens = readStringArray("tokens", json);
Pair<Pair<String, Double>, int[]> sentences = readSentences(json);
IntPair[] offsets = TokenUtils.getTokenOffsets(text, tokens);
TextAnnotation ta = new TextAnnotation(corpusId, id, text, offsets, tokens, sentences.getSecond());
JsonArray views = json.getAsJsonArray("views");
for (int i = 0; i < views.size(); i++) {
JsonObject view = (JsonObject) views.get(i);
String viewName = readString("viewName", view);
JsonArray viewData = view.getAsJsonArray("viewData");
List<View> topKViews = new ArrayList<>();
for (int k = 0; k < viewData.size(); k++) {
JsonObject kView = (JsonObject) viewData.get(k);
topKViews.add(readView(kView, ta));
}
ta.addTopKView(viewName, topKViews);
}
readAttributes(ta, json);
return ta;
}
Aggregations