use of edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token in project cogcomp-nlp by CogComp.
the class CoNLL2000Parser method next.
/**
* Produces the next object parsed from the input file; in this case, that object is guaranteed
* to be a <code>LinkedVector</code> populated by <code>Token</code>s representing a sentence.
**/
public Object next() {
String[] line = (String[]) super.next();
while (line != null && line.length == 0) line = (String[]) super.next();
if (line == null)
return null;
String pos = line[1];
if (pos.equals("-"))
pos = null;
Token t = new Token(new Word(line[0], pos), null, line[2]);
for (line = (String[]) super.next(); line != null && line.length > 0; line = (String[]) super.next()) {
pos = line[1];
if (pos.equals("-"))
pos = null;
t.next = new Token(new Word(line[0], pos), t, line[2]);
t = (Token) t.next;
}
return new LinkedVector(t);
}
use of edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token in project cogcomp-nlp by CogComp.
the class TestDiff method testDiff.
@Test
public void testDiff() {
Chunker tagger = new Chunker();
Parser parser = new PlainToTokenParser(new WordSplitter(new SentenceSplitter(testFile)));
String previous = "";
String sentence = "";
int sentenceCounter = 0;
for (Token w = (Token) parser.next(); w != null; w = (Token) parser.next()) {
String prediction = tagger.discreteValue(w);
if (prediction.startsWith("B-") || prediction.startsWith("I-") && !previous.endsWith(prediction.substring(2)))
sentence += ("[" + prediction.substring(2) + " ");
sentence += ("(" + w.partOfSpeech + " " + w.form + ") ");
if (!prediction.equals("O") && (w.next == null || tagger.discreteValue(w.next).equals("O") || tagger.discreteValue(w.next).startsWith("B-") || !tagger.discreteValue(w.next).endsWith(prediction.substring(2))))
sentence += ("] ");
if (w.next == null) {
sentence = sentence.trim();
String refSentence = refSentences.get(sentenceCounter).trim();
if (!sentence.equals(refSentence))
fail("Produced output doesn't match reference: " + "\nProduced: " + sentence + "\nExpected: " + refSentence);
sentence = "";
sentenceCounter++;
}
previous = prediction;
}
}
use of edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token in project cogcomp-nlp by CogComp.
the class TestChunkerModels method testAccuracy.
public void testAccuracy() {
Parser parser = new ChildrenFromVectors(new CoNLL2000Parser(labeledData));
int numSeen = 0;
int numEqual = 0;
for (Token w = (Token) parser.next(); w != null; w = (Token) parser.next()) {
String prediction = tagger.discreteValue(w);
String raw = w.toString();
String actualChunk = raw.substring(raw.indexOf('(') + 1, raw.indexOf(' '));
if (prediction.equals(actualChunk)) {
numEqual++;
}
numSeen++;
}
logger.info("Total accuracy over " + numSeen + " items: " + String.format("%.2f", 100.0 * (double) numEqual / (double) numSeen) + "%");
}
use of edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token in project cogcomp-nlp by CogComp.
the class Chunker method cachedFeatureValue.
private Feature cachedFeatureValue(Object __example) {
Token word = (Token) __example;
String __cachedValue = word.type;
if (__cachedValue != null) {
return new DiscretePrimitiveStringFeature(containingPackage, name, "", __cachedValue, valueIndexOf(__cachedValue), (short) allowableValues().length);
}
Feature __result;
__result = super.featureValue(__example);
word.type = __result.getStringValue();
return __result;
}
Aggregations