use of edu.illinois.cs.cogcomp.core.datastructures.IntPair in project cogcomp-nlp by CogComp.
the class TextAnnotation method getSpansMatching.
public List<IntPair> getSpansMatching(String text) {
if (allSpans == null) {
synchronized (this) {
if (allSpans == null) {
this.allSpans = TCollections.synchronizedMap(new TIntObjectHashMap<ArrayList<IntPair>>());
/**
* creates a hash for each contiguous substring, and creates an entry for it in
* allSpans NOTE: spans previously used "at-the-end" indexing but then the
* offsets won't agree with actual constituents, so CHANGED IT 2016/05/11. MS
*/
for (int start = 0; start < this.size() - 1; start++) {
StringBuilder sb = new StringBuilder();
for (int end = start; end < this.size(); end++) {
String token = tokens[end];
token = token.replaceAll("``", "\"").replaceAll("''", "\"");
token = SentenceUtils.convertFromPTBBrackets(token);
sb.append(token).append(" ");
int hash = sb.toString().trim().hashCode();
if (!allSpans.containsKey(hash))
allSpans.put(hash, new ArrayList<IntPair>());
List<IntPair> list = allSpans.get(hash);
list.add(new IntPair(start, end + 1));
}
}
}
}
}
int hashCode = text.trim().hashCode();
int length = text.split("\\s+").length;
List<IntPair> list = allSpans.get(hashCode);
if (list == null)
list = new ArrayList<>();
return list;
}
use of edu.illinois.cs.cogcomp.core.datastructures.IntPair in project cogcomp-nlp by CogComp.
the class DummyTextAnnotationGenerator method addSrlFrame.
private static void addSrlFrame(PredicateArgumentView srlView, String viewName, TextAnnotation ta, IntPair verbSRLPredicate, String sense, Map<IntPair, String> srlArgs) {
Constituent predicate = new Constituent("predicate", viewName, ta, verbSRLPredicate.getFirst(), verbSRLPredicate.getSecond());
predicate.addAttribute(PredicateArgumentView.LemmaIdentifier, lemmasAll[verbSRLPredicate.getFirst()]);
predicate.addAttribute(PredicateArgumentView.SenseIdentifer, sense);
List<Constituent> args = new ArrayList<>();
List<String> tempArgLabels = new ArrayList<>();
for (IntPair span : srlArgs.keySet()) {
args.add(new Constituent("argument", viewName, ta, span.getFirst(), span.getSecond()));
tempArgLabels.add(srlArgs.get(span));
}
String[] argLabels = tempArgLabels.toArray(new String[args.size()]);
double[] scores = new double[args.size()];
srlView.addPredicateArguments(predicate, args, argLabels, scores);
}
use of edu.illinois.cs.cogcomp.core.datastructures.IntPair in project cogcomp-nlp by CogComp.
the class DummyTextAnnotationGenerator method generateAnnotatedTextAnnotation.
public static TextAnnotation generateAnnotatedTextAnnotation(String[] viewsToAdd, boolean withNoisyLabels, int sentenceNum) {
// we can do at-most 3 sentences, for now
if (sentenceNum > 3) {
logger.error("Currently this function supports at most 3 sentences per TextAnnotation. If you need more, " + "you have to augment this function");
throw new RuntimeException();
}
// at least one sentence
if (sentenceNum < 1) {
logger.error("The requested TextAnnotation has to have at least one sentence. ");
throw new RuntimeException();
}
List<String[]> annotatedTokenizedStringArrayAll = new ArrayList<>();
annotatedTokenizedStringArrayAll.addAll(annotatedTokenizedStringArray1);
if (sentenceNum > 1)
annotatedTokenizedStringArrayAll.addAll(annotatedTokenizedStringArray2);
if (sentenceNum > 2)
annotatedTokenizedStringArrayAll.addAll(annotatedTokenizedStringArray3);
TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(annotatedTokenizedStringArrayAll);
for (String viewName : viewsToAdd) {
switch(viewName) {
case ViewNames.SENTENCE:
SpanLabelView sentView = new SpanLabelView(ViewNames.SENTENCE, ta);
sentView.addConstituent(new Constituent("sent1", ViewNames.SENTENCE, ta, 0, pos1.length));
if (sentenceNum > 1)
sentView.addConstituent(new Constituent("sent2", ViewNames.SENTENCE, ta, pos1.length, pos1.length + pos2.length));
if (sentenceNum > 2)
sentView.addConstituent(new Constituent("sent3", ViewNames.SENTENCE, ta, pos1.length + pos2.length, pos1.length + pos2.length + pos3.length));
ta.addView(ViewNames.SENTENCE, sentView);
ta.setSentences();
break;
case ViewNames.POS:
TokenLabelView posView = new TokenLabelView(viewName, ta);
String[] pos1Overall = withNoisyLabels ? pos_noisy1 : pos1;
String[] pos2Overall = withNoisyLabels ? pos_noisy2 : pos2;
String[] pos3Overall = withNoisyLabels ? pos_noisy3 : pos3;
for (int i = 0; i < pos1Overall.length; i++) posView.addTokenLabel(i, pos1Overall[i], 1.0);
if (sentenceNum > 1)
for (int i = 0; i < pos2Overall.length; i++) posView.addTokenLabel(pos1Overall.length + i, pos2Overall[i], 1.0);
if (sentenceNum > 2)
for (int i = 0; i < pos3Overall.length; i++) posView.addTokenLabel(pos1Overall.length + pos2Overall.length + i, pos3Overall[i], 1.0);
ta.addView(viewName, posView);
break;
case ViewNames.LEMMA:
TokenLabelView lemmaView = new TokenLabelView(ViewNames.LEMMA, ta);
String[] lemmaOveral1 = withNoisyLabels ? lemmas_noisy1 : lemmas1;
String[] lemmaOveral2 = withNoisyLabels ? lemmas_noisy2 : lemmas2;
String[] lemmaOveral3 = withNoisyLabels ? lemmas_noisy3 : lemmas3;
for (int i = 0; i < lemmaOveral1.length; i++) lemmaView.addTokenLabel(i, lemmaOveral1[i], 1.0);
if (sentenceNum > 1)
for (int i = 0; i < lemmaOveral2.length; i++) lemmaView.addTokenLabel(lemmaOveral1.length + i, lemmaOveral2[i], 1.0);
if (sentenceNum > 2)
for (int i = 0; i < lemmaOveral3.length; i++) lemmaView.addTokenLabel(lemmaOveral1.length + lemmaOveral2.length + i, lemmaOveral3[i], 1.0);
ta.addView(viewName, lemmaView);
break;
case ViewNames.SHALLOW_PARSE:
SpanLabelView chunkView = new SpanLabelView(ViewNames.SHALLOW_PARSE, ta);
Map<IntPair, String> chunkOverall1 = withNoisyLabels ? chunks_noisy1 : chunks1;
Map<IntPair, String> chunkOverall2 = withNoisyLabels ? chunks_noisy2 : chunks2;
Map<IntPair, String> chunkOverall3 = withNoisyLabels ? chunks_noisy3 : chunks3;
for (IntPair span : chunkOverall1.keySet()) chunkView.addSpanLabel(span.getFirst(), span.getSecond(), chunkOverall1.get(span), 1.0);
if (sentenceNum > 1)
for (IntPair span : chunkOverall2.keySet()) chunkView.addSpanLabel(span.getFirst(), span.getSecond(), chunkOverall2.get(span), 1.0);
if (sentenceNum > 2)
for (IntPair span : chunkOverall3.keySet()) chunkView.addSpanLabel(span.getFirst(), span.getSecond(), chunkOverall3.get(span), 1.0);
ta.addView(ViewNames.SHALLOW_PARSE, chunkView);
break;
case ViewNames.NER_CONLL:
SpanLabelView nerView = new SpanLabelView(ViewNames.NER_CONLL, ta);
Map<IntPair, String> nerSource1 = withNoisyLabels ? ner_noisy1 : ner1;
Map<IntPair, String> nerSource2 = withNoisyLabels ? ner_noisy2 : ner2;
Map<IntPair, String> nerSource3 = withNoisyLabels ? ner_noisy3 : ner3;
for (IntPair span : nerSource1.keySet()) nerView.addSpanLabel(span.getFirst(), span.getSecond(), nerSource1.get(span), 1.0);
if (sentenceNum > 1)
for (IntPair span : nerSource2.keySet()) nerView.addSpanLabel(span.getFirst(), span.getSecond(), nerSource2.get(span), 1.0);
if (sentenceNum > 2)
for (IntPair span : nerSource3.keySet()) nerView.addSpanLabel(span.getFirst(), span.getSecond(), nerSource3.get(span), 1.0);
ta.addView(ViewNames.NER_CONLL, nerView);
break;
case ViewNames.PARSE_GOLD:
case ViewNames.PARSE_STANFORD:
case ViewNames.PARSE_BERKELEY:
case ViewNames.PSEUDO_PARSE_STANFORD:
case ViewNames.PARSE_CHARNIAK:
TreeView parseView = new TreeView(viewName, ta);
String treeOveral1 = withNoisyLabels ? tree_noisy1 : tree1;
String treeOveral2 = withNoisyLabels ? tree_noisy2 : tree2;
String treeOveral3 = withNoisyLabels ? tree_noisy3 : tree3;
parseView.setParseTree(0, TreeParserFactory.getStringTreeParser().parse(treeOveral1));
if (sentenceNum > 1)
parseView.setParseTree(1, TreeParserFactory.getStringTreeParser().parse(treeOveral2));
if (sentenceNum > 2)
parseView.setParseTree(2, TreeParserFactory.getStringTreeParser().parse(treeOveral3));
ta.addView(viewName, parseView);
break;
case ViewNames.SRL_VERB:
PredicateArgumentView verbSRLView = new PredicateArgumentView(viewName, ta);
addSrlFrame(verbSRLView, viewName, ta, verbSRLPredicate1, (withNoisyLabels ? verbSRLPredicateSense_noisy : verbSRLPredicateSense), (withNoisyLabels ? verbSRLArgs_noisy1 : verbSRLArgs1));
if (sentenceNum > 1) {
addSrlFrame(verbSRLView, viewName, ta, verbSRLPredicate2, (withNoisyLabels ? verbSRLPredicateSense_noisy : verbSRLPredicateSense), (withNoisyLabels ? verbSRLArgs_noisy2 : verbSRLArgs2));
}
if (sentenceNum > 2) {
addSrlFrame(verbSRLView, viewName, ta, verbSRLPredicate3, (withNoisyLabels ? verbSRLPredicateSense_noisy : verbSRLPredicateSense), (withNoisyLabels ? verbSRLArgs_noisy3 : verbSRLArgs3));
addSrlFrame(verbSRLView, viewName, ta, verbSRLPredicate4, (withNoisyLabels ? verbSRLPredicateSense_noisy : verbSRLPredicateSense), (withNoisyLabels ? verbSRLArgs_noisy4 : verbSRLArgs4));
}
ta.addView(viewName, verbSRLView);
break;
case ViewNames.NER_ONTONOTES:
// For now the NER views are going to be empty
ta.addView(viewName, new SpanLabelView(viewName, ta));
default:
logger.error("Cannot provide annotation for {}", viewName);
}
}
return ta;
}
use of edu.illinois.cs.cogcomp.core.datastructures.IntPair in project cogcomp-nlp by CogComp.
the class BrownClusterViewGenerator method addView.
@Override
public void addView(TextAnnotation ta) {
lazyLoadClusters();
SpanLabelView view = new SpanLabelView(getViewName(), "BrownClusters", ta, 1.0, true);
Map<String, List<IntPair>> m = getMatchingSpans(ta);
for (Entry<String, List<IntPair>> entry : m.entrySet()) {
String label = entry.getKey();
Set<IntPair> added = new LinkedHashSet<>();
for (IntPair p : entry.getValue()) {
// don't add nested constituents of the same type
boolean foundContainer = false;
for (IntPair p1 : added) {
if (p1 == p)
continue;
if (p1.getFirst() <= p.getFirst() && p1.getSecond() >= p.getSecond()) {
foundContainer = true;
break;
}
}
if (!foundContainer) {
view.addSpanLabel(p.getFirst(), p.getSecond(), label, 1.0);
added.add(p);
}
}
}
ta.addView(getViewName(), view);
}
use of edu.illinois.cs.cogcomp.core.datastructures.IntPair in project cogcomp-nlp by CogComp.
the class GazetteerViewGenerator method addView.
private void addView(String label, int labelId, SpanLabelView view, TIntObjectHashMap<ArrayList<IntPair>> allSpans) {
logger.debug("Adding gazetteer {}", label);
List<IntPair> matches = new ArrayList<>();
int[] pattern = this.patterns.get(labelId);
int[] len = this.lengths.get(labelId);
for (int i = 0; i < pattern.length; i++) {
int hashCode = pattern[i];
int length = len[i];
if (allSpans.containsKey(hashCode)) {
List<IntPair> list = allSpans.get(hashCode);
for (IntPair pair : list) {
if (pair.getSecond() - pair.getFirst() == length)
matches.add(pair);
}
}
}
Set<IntPair> added = new LinkedHashSet<>();
for (IntPair p : matches) {
// don't add nested constituents of the same type
boolean foundContainer = false;
for (IntPair p1 : added) {
if (p1 == p)
continue;
if (p1.getFirst() <= p.getFirst() && p1.getSecond() >= p.getSecond()) {
foundContainer = true;
break;
}
}
if (!foundContainer) {
view.addSpanLabel(p.getFirst(), p.getSecond(), label, 1.0);
added.add(p);
}
}
}
Aggregations