use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.
the class SimpleGazetteerAnnotator method addView.
/**
* The view will consist of potentially overlapping constituents representing those tokens that
* matched entries in the gazetteers. Some tokens will match against several gazetteers.
*/
@Override
public void addView(TextAnnotation ta) throws AnnotatorException {
View view = ta.getView(ViewNames.TOKENS);
List<Constituent> constituents = view.getConstituents();
SpanLabelView slv = new SpanLabelView(this.getViewName(), this.getClass().getName(), ta, 1d, true);
for (int constindx = 0; constindx < constituents.size(); constindx++) {
for (int dictindx = 0; dictindx < dictionaries.size(); dictindx++) {
dictionaries.get(dictindx).match(constituents, constindx, slv);
dictionariesIgnoreCase.get(dictindx).match(constituents, constindx, slv);
}
}
ta.addView(slv.getViewName(), slv);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.
the class TreebankChunkReader method addChunkAnnotation.
private TextAnnotation addChunkAnnotation(TextAnnotation textAnnotation, int chunkLineId) {
SpanLabelView chunkView = new SpanLabelView(ViewNames.SHALLOW_PARSE, "Gold", textAnnotation, 1.0);
String currentChunkLabel = "";
int start = -1;
while (currentChunkLineId < chunkLines.size()) {
String line = chunkLines.get(currentChunkLineId++);
if (line.trim().length() == 0)
break;
if (line.startsWith("#")) {
if (!line.startsWith("# Sentence"))
continue;
line = line.replaceAll("# Sentence ", "");
String[] parts = line.split("/");
String fId = parts[0];
int tId = Integer.parseInt(parts[1]);
if (!currentSectionFiles[currentFileId - 1].equals("wsj_" + fId + ".mrg"))
throw new IllegalStateException(currentSectionFiles[currentFileId - 1] + " does not match " + "wsj_" + fId + ".mrg");
if (tId != this.treeInFile)
throw new IllegalStateException("Expected tree id: " + tId + ", found: " + (this.treeInFile));
continue;
}
String[] parts = line.split(" +");
int id = Integer.parseInt(parts[1]);
String chunkLabel = parts[2];
String word = parts[4];
word = SentenceUtils.makeSentencePresentable(word);
String expectedWord = textAnnotation.getToken(id);
if (!word.equals(expectedWord))
throw new IllegalStateException("Expected word: " + expectedWord + ", found " + word);
if (currentChunkLabel.equals("")) {
if (chunkLabel.startsWith("B")) {
start = id;
currentChunkLabel = chunkLabel;
} else if (!chunkLabel.startsWith("O")) {
throw new IllegalStateException("Expected B, found " + chunkLabel);
}
} else if (currentChunkLabel.startsWith("B")) {
if (chunkLabel.startsWith("B")) {
if (start >= 0)
chunkView.addSpanLabel(start, id, currentChunkLabel.replaceAll("B-", ""), 1d);
else
throw new IllegalStateException("Start <0");
currentChunkLabel = chunkLabel;
start = id;
} else if (chunkLabel.startsWith("I-")) {
} else if (chunkLabel.startsWith("O")) {
if (start >= 0)
chunkView.addSpanLabel(start, id, currentChunkLabel.replaceAll("B-", ""), 1d);
else
throw new IllegalStateException("Start <0");
start = -1;
currentChunkLabel = chunkLabel;
}
} else if (currentChunkLabel.startsWith("I-")) {
if (chunkLabel.startsWith("B")) {
if (start >= 0)
chunkView.addSpanLabel(start, id, currentChunkLabel.replaceAll("B-", ""), 1d);
else
throw new IllegalStateException("Start <0");
currentChunkLabel = chunkLabel;
start = id;
} else if (chunkLabel.startsWith("I-")) {
} else if (chunkLabel.startsWith("O")) {
if (start >= 0)
chunkView.addSpanLabel(start, id, currentChunkLabel.replaceAll("B-", ""), 1d);
else
throw new IllegalStateException("Start <0");
currentChunkLabel = chunkLabel;
start = -1;
}
} else if (currentChunkLabel.startsWith("O")) {
if (chunkLabel.startsWith("B")) {
currentChunkLabel = chunkLabel;
start = id;
} else if (chunkLabel.startsWith("I-")) {
throw new IllegalStateException("Expected B, found " + chunkLabel);
} else if (chunkLabel.startsWith("O")) {
currentChunkLabel = chunkLabel;
}
}
}
// end of while
textAnnotation.addView(ViewNames.SHALLOW_PARSE, chunkView);
return textAnnotation;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.
the class BrownClusterViewGenerator method addView.
@Override
public void addView(TextAnnotation ta) {
lazyLoadClusters();
SpanLabelView view = new SpanLabelView(getViewName(), "BrownClusters", ta, 1.0, true);
Map<String, List<IntPair>> m = getMatchingSpans(ta);
for (Entry<String, List<IntPair>> entry : m.entrySet()) {
String label = entry.getKey();
Set<IntPair> added = new LinkedHashSet<>();
for (IntPair p : entry.getValue()) {
// don't add nested constituents of the same type
boolean foundContainer = false;
for (IntPair p1 : added) {
if (p1 == p)
continue;
if (p1.getFirst() <= p.getFirst() && p1.getSecond() >= p.getSecond()) {
foundContainer = true;
break;
}
}
if (!foundContainer) {
view.addSpanLabel(p.getFirst(), p.getSecond(), label, 1.0);
added.add(p);
}
}
}
ta.addView(getViewName(), view);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.
the class CurrencyIndicator method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
try {
if (!loaded)
synchronized (this) {
// now its changed to be loaded from datastore.
if (!loaded)
loadCurrency(gzip, true);
}
} catch (Exception ex) {
throw new EdisonException(ex);
}
TextAnnotation ta = c.getTextAnnotation();
if (!ta.hasView(VIEW_NAME)) {
try {
addCurrencyView(ta);
} catch (Exception e) {
e.printStackTrace();
}
}
SpanLabelView view = (SpanLabelView) ta.getView(VIEW_NAME);
Set<Feature> features = new LinkedHashSet<>();
for (Constituent cc : view.where(Queries.containedInConstituent(c))) {
if (cc.getEndSpan() == c.getEndSpan()) {
if (cc.getStartSpan() - 1 > c.getEndSpan()) {
// check if this is a number
if (WordLists.NUMBERS.contains(ta.getToken(cc.getStartSpan() - 1).toLowerCase())) {
features.add(CURRENCY);
break;
}
}
} else if (WordFeatureExtractorFactory.numberNormalizer.getWordFeatures(ta, cc.getEndSpan()).size() > 0) {
features.add(CURRENCY);
break;
}
}
return features;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.
the class SimpleGazetteerAnnotatorTest method testMultiThreading.
/**
* Test method for
* {@link edu.illinois.cs.cogcomp.edison.annotators.SimpleGazetteerAnnotator#addView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)}
* .
*
* @throws URISyntaxException
* @throws IOException
* @throws AnnotatorException
*/
@Test
public void testMultiThreading() throws IOException, URISyntaxException, AnnotatorException {
final SimpleGazetteerAnnotator sga = new SimpleGazetteerAnnotator(defaultRm);
class TestThread extends Thread {
Throwable throwable;
public void run() {
long start = System.currentTimeMillis();
while (true) {
final TextAnnotation ta = tab.createTextAnnotation("I hail from the university of illinois at champaign urbana.");
try {
sga.addView(ta);
} catch (AnnotatorException e) {
throwable = e;
return;
}
SpanLabelView view = (SpanLabelView) ta.getView(ViewNames.TREE_GAZETTEER);
List<Constituent> entities = view.getConstituents();
Constituent c1 = entities.get(0);
try {
assertEquals(c1.toString(), "university of illinois");
Constituent c2 = entities.get(1);
assertEquals(c2.toString(), "university of illinois at champaign urbana");
Constituent c3 = entities.get(2);
assertEquals(c3.toString(), "illinois");
Constituent c4 = entities.get(3);
assertEquals(c4.toString(), "champaign");
Constituent c5 = entities.get(4);
assertEquals(c5.toString(), "urbana");
assertEquals(c1.getLabel(), "organizations(IC)");
assertEquals(c2.getLabel(), "organizations(IC)");
assertEquals(c3.getLabel(), "places(IC)");
assertEquals(c4.getLabel(), "places(IC)");
assertEquals(c5.getLabel(), "places(IC)");
if ((System.currentTimeMillis() - start) > 10000l) {
// run for one minute.
throwable = null;
return;
}
} catch (AssertionError ae) {
throwable = ae;
ae.printStackTrace();
return;
}
}
}
}
final int numthreads = 20;
TestThread[] threads = new TestThread[numthreads];
for (int i = 0; i < numthreads; i++) {
threads[i] = new TestThread();
threads[i].start();
}
logger.info("Begin multithreaded test.");
for (int i = 0; i < numthreads; i++) {
while (true) try {
threads[i].join();
assertEquals("Exception during multithreading test : " + threads[i].throwable, threads[i].throwable, null);
break;
} catch (InterruptedException e) {
continue;
}
}
}
Aggregations