use of edu.illinois.cs.cogcomp.core.datastructures.IntPair in project cogcomp-nlp by CogComp.
the class TreeView method findSmallestSpan.
private List<Constituent> findSmallestSpan(List<Constituent> candidates) {
List<Constituent> spans = new ArrayList<>();
IntPair span = null;
for (Constituent candidate : candidates) {
boolean add = false;
if (span == null) {
span = candidate.getSpan();
add = true;
} else if (span.equals(candidate.getSpan())) {
add = true;
}
if (add) {
// Don't add POS tags and words
if (candidate.getOutgoingRelations().size() > 0 && !ParseTreeProperties.isPreTerminal(candidate))
spans.add(candidate);
}
}
return spans;
}
use of edu.illinois.cs.cogcomp.core.datastructures.IntPair in project cogcomp-nlp by CogComp.
the class TreeView method setScoredParseTree.
/**
* Set the parse tree of the {@code sentenceId}th sentence.
*/
public void setScoredParseTree(int sentenceId, Tree<String> tree, Tree<Double> scores) {
safeInitializeTrees();
if (!this.getViewName().startsWith("PARSE")) {
throw new IllegalStateException("Cannot set a Tree<String> object " + "as the dependency tree." + " Need a Tree<String, Integer> " + "to recover dependency token information. ");
}
this.trees.set(sentenceId, tree);
Tree<Pair<String, IntPair>> spanLabeledTree = ParseUtils.getSpanLabeledTree(tree);
int sentenceStart = getSentenceStart(sentenceId);
Pair<String, IntPair> rootLabel = spanLabeledTree.getLabel();
IntPair rootSpan = rootLabel.getSecond();
int rootStart = rootSpan.getFirst() + sentenceStart;
int rootEnd = rootSpan.getSecond() + sentenceStart;
Constituent root = createNewConstituent(rootStart, rootEnd, rootLabel.getFirst(), scores.getLabel());
this.addConstituent(root);
addScoredParseTree(spanLabeledTree, scores, root, this.getTextAnnotation().getSentence(sentenceId).getStartSpan());
}
use of edu.illinois.cs.cogcomp.core.datastructures.IntPair in project cogcomp-nlp by CogComp.
the class StringTransformation method transformString.
/**
* Modify the current version of the transformed text (as returned by getTransformedText()) by replacing the
* string between character offsets textStart and textEnd with newStr.
* @param textStart character offset start of edit in transformed text
* @param textEnd character offset end of edit in transformed text
* @param newStr string to replace specified character span
* @return the offsets in the current, internally transformed text corresponding to textStart and textEnd
*/
public IntPair transformString(int textStart, int textEnd, String newStr) {
int start = textStart;
int end = textEnd;
// need updated offsets for return value -- e.g. to use as key for transform attributes
if (isModified) {
start = computeCurrentOffset(textStart);
end = computeCurrentOffset(textEnd);
if (start < 0 || end < 0) {
throw new IllegalStateException("ERROR: edit affects deleted span (offsets are negative). Reorder " + "edits or filter overlapping edits.");
}
}
// compute the net change in offset: negative for deletion/reduction, positive for insertion,
// zero for same-length substitution; store with indexes in current transformed text
int newLen = newStr.length();
int origLen = textEnd - textStart;
int netDiff = newLen - origLen;
EditType editType = EditType.SUBST;
if (netDiff != 0) {
// else just replaced, no offset changes needed.
// for insertion, add the modifier at the end of the original span
int putIndex = textStart + origLen;
if (netDiff < 0) {
// involves deleting chars: after new str, modify the offsets
putIndex = textStart + newLen;
editType = (newLen == 0) ? EditType.DELETE : EditType.REDUCE;
} else
// expanding or inserting
editType = (origLen == 0) ? EditType.INSERT : EditType.EXPAND;
// account for any previous modifications at this index
if (currentOffsetModifications.containsKey(putIndex))
netDiff += currentOffsetModifications.get(putIndex).getFirst();
currentOffsetModifications.put(putIndex, new Pair(new Integer(netDiff), editType));
}
IntPair transformOffsets = new IntPair(start, end);
String origStr = transformedText.substring(textStart, textEnd);
// edit offsets encode affected substring allowing for previous edits in current pass
edits.add(new Edit(transformOffsets, origStr, newStr, editType));
isModified = true;
return transformOffsets;
}
use of edu.illinois.cs.cogcomp.core.datastructures.IntPair in project cogcomp-nlp by CogComp.
the class TextAnnotationFromTokenizationTest method getTokenization.
private Tokenizer.Tokenization getTokenization(String rawText) {
String[] sentences = this.rawText.split("\\n");
String[] tokens = new String[0];
List<IntPair> characterOffsets = new ArrayList<>();
int[] sentenceEndArray = new int[sentences.length];
int sentenceCharOffset = 0;
int lastTokenCount = 0;
for (int iSentence = 0; iSentence < sentences.length; iSentence++) {
String sentence = sentences[iSentence];
String[] sentenceTokens = sentence.split("\\s");
tokens = (String[]) ArrayUtils.addAll(tokens, sentenceTokens);
int charOffsetBegin = sentenceCharOffset;
int charOffsetEnd = sentenceCharOffset;
for (int i = 0; i < sentence.length(); i++) {
char c = sentence.charAt(i);
if (Character.isWhitespace(c)) {
charOffsetEnd = sentenceCharOffset + i;
IntPair tokenOffsets = new IntPair(charOffsetBegin, charOffsetEnd);
characterOffsets.add(tokenOffsets);
charOffsetBegin = charOffsetEnd + 1;
}
}
charOffsetEnd = sentenceCharOffset + sentence.length();
IntPair tokenOffsets = new IntPair(charOffsetBegin, charOffsetEnd);
characterOffsets.add(tokenOffsets);
sentenceCharOffset = charOffsetEnd + 1;
lastTokenCount += sentenceTokens.length;
sentenceEndArray[iSentence] = lastTokenCount;
}
IntPair[] charOffsetArray = new IntPair[characterOffsets.size()];
for (int i = 0; i < characterOffsets.size(); i++) {
charOffsetArray[i] = characterOffsets.get(i);
}
Tokenizer.Tokenization tokenization = new Tokenizer.Tokenization(tokens, charOffsetArray, sentenceEndArray);
return tokenization;
}
use of edu.illinois.cs.cogcomp.core.datastructures.IntPair in project cogcomp-nlp by CogComp.
the class XmlDocumentProcessor method compileAttributeValues.
/**
* builds a map of attribute value offsets to attribute value to support search for metadata matching
* entity mentions
*
* @param xmlMarkup xml span information collected from source document
* @return a map from attribute value character offsets in source text to attribute value
*/
public static Map<IntPair, Set<String>> compileAttributeValues(List<SpanInfo> xmlMarkup) {
Map<IntPair, Set<String>> attrVals = new HashMap<>();
for (XmlDocumentProcessor.SpanInfo si : xmlMarkup) {
for (Map.Entry<String, Pair<String, IntPair>> e : si.attributes.entrySet()) {
IntPair offset = e.getValue().getSecond();
Set<String> vals = attrVals.get(offset);
if (null == vals) {
vals = new HashSet<>();
attrVals.put(offset, vals);
}
vals.add(e.getValue().getFirst());
}
}
return attrVals;
}
Aggregations