use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class PrepSRLAnnotator method addView.
@Override
protected void addView(TextAnnotation ta) throws AnnotatorException {
List<Constituent> candidates = new ArrayList<>();
for (Constituent c : ta.getView(ViewNames.TOKENS).getConstituents()) {
int tokenId = c.getStartSpan();
if (PrepSRLDataReader.isPrep(ta, tokenId))
candidates.add(c.cloneForNewViewWithDestinationLabel(viewName, DataReader.CANDIDATE));
// Now check bigrams & trigrams
Constituent multiWordPrep = PrepSRLDataReader.isBigramPrep(ta, tokenId, viewName);
if (multiWordPrep != null)
candidates.add(multiWordPrep);
multiWordPrep = PrepSRLDataReader.isTrigramPrep(ta, tokenId, viewName);
if (multiWordPrep != null)
candidates.add(multiWordPrep);
}
SpanLabelView prepositionLabelView = new SpanLabelView(viewName, viewName + "-annotator", ta, 1.0, true);
for (Constituent c : candidates) {
String role = classifier.discreteValue(c);
if (!role.equals(DataReader.CANDIDATE))
prepositionLabelView.addSpanLabel(c.getStartSpan(), c.getEndSpan(), role, 1.0);
}
ta.addView(viewName, prepositionLabelView);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class SentencePipelineTest method testFailingPosFile.
@Test
public void testFailingPosFile() {
String text = null;
try {
text = LineIO.slurp(POS_FILE);
} catch (FileNotFoundException e) {
e.printStackTrace();
fail(e.getMessage());
}
TextAnnotation ta = null;
try {
ta = sentenceProcessor.createAnnotatedTextAnnotation("testPos", "tesPos", text);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
Constituent s = ta.getView(ViewNames.SENTENCE).getConstituents().get(3);
List<Constituent> posConstituentsInThirdSent = ta.getView(ViewNames.POS).getConstituentsOverlappingCharSpan(s.getStartCharOffset(), s.getEndCharOffset());
List<Constituent> toksInThirdSent = ta.getView(ViewNames.TOKENS).getConstituentsOverlappingCharSpan(s.getStartCharOffset(), s.getEndCharOffset());
assertTrue(posConstituentsInThirdSent.size() > 0);
assertEquals(toksInThirdSent.size(), posConstituentsInThirdSent.size());
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class LBJavaUtils method recordToLBJTokens.
/**
* Converts a record into LBJ Tokens for use with LBJ classifiers. If part of speech is present
* in record, it is added to the LBJ tokens.
*/
public static List<Token> recordToLBJTokens(TextAnnotation record) {
List<Token> lbjTokens = new LinkedList<>();
List<List<String>> sentences = tokensAsStrings(record.getView(ViewNames.TOKENS).getConstituents(), record.getView(ViewNames.SENTENCE).getConstituents(), record.getText());
List<Constituent> tags = null;
if (record.hasView(ViewNames.POS))
tags = record.getView(ViewNames.POS).getConstituents();
int tagIndex = 0;
for (List<String> sentence : sentences) {
boolean opendblquote = true;
Word wprevious = null;
Token tprevious = null;
for (String token : sentence) {
if (token.equals("\"")) {
token = opendblquote ? "``" : "''";
opendblquote = !opendblquote;
} else if (token.equals("(")) {
token = "-LRB-";
} else if (token.equals(")")) {
token = "-RRB-";
} else if (token.equals("{")) {
token = "-LCB-";
} else if (token.equals("}")) {
token = "-RCB-";
} else if (token.equals("[")) {
token = "-LSB-";
} else if (token.equals("]")) {
token = "-RSB-";
}
Word wcurrent = new Word(token, wprevious);
if (null != tags && !tags.isEmpty()) {
Constituent tag = tags.get(tagIndex++);
wcurrent.partOfSpeech = tag.getLabel();
}
Token tcurrent = new Token(wcurrent, tprevious, "");
lbjTokens.add(tcurrent);
if (tprevious != null) {
tprevious.next = tcurrent;
}
wprevious = wcurrent;
tprevious = tcurrent;
}
}
return lbjTokens;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class LBJavaUtils method tokensAsStrings.
/**
* Converts sentences and tokens represented as spans into a list of lists of string.
*/
public static List<List<String>> tokensAsStrings(List<Constituent> tokens, List<Constituent> sentences, String rawText) {
List<List<String>> strTokens = new ArrayList<>();
int sentNum = 0;
Constituent sentence = sentences.get(sentNum);
strTokens.add(new ArrayList<String>());
for (Constituent token : tokens) {
if (token.getStartSpan() >= sentence.getEndSpan()) {
strTokens.add(new ArrayList<String>());
sentNum++;
sentence = sentences.get(sentNum);
}
strTokens.get(sentNum).add(rawText.substring(token.getStartCharOffset(), token.getEndCharOffset()));
}
return strTokens;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class NERAnnotatorTest method evaluatePerformance.
/**
* Make sure it runs in reasonable time. We will test the performance of the machine we run on
* to get a better measure.
*/
// @Test
public void evaluatePerformance() {
// now do performance.
final int SIZE = 100;
// make sure any lazy loading is done outside the performance test.
TextAnnotation tat = tab.createTextAnnotation(TEST_INPUT);
try {
getView(tat);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
long expectedPerformance = this.measureMachinePerformance();
logger.info("Expect " + expectedPerformance);
{
TextAnnotation ta = tab.createTextAnnotation(TEST_INPUT);
View view = null;
try {
view = getView(ta);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertTrue(view != null);
}
// start the performance test.
long start = System.currentTimeMillis();
for (int i = 0; i < SIZE; i++) {
TextAnnotation ta = tab.createTextAnnotation(TEST_INPUT);
View view = null;
try {
view = getView(ta);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertTrue(view != null);
for (Constituent c : view.getConstituents()) {
assertTrue("No entity named \"" + c.toString() + "\"", entities.contains(c.toString()));
}
}
start = System.currentTimeMillis() - start;
start /= SIZE;
System.out.printf("For text size = %d, average NER runtime = %d, normalized = %f", TEST_INPUT.length(), start, (double) start / (double) expectedPerformance);
assertTrue(start <= expectedPerformance);
}
Aggregations