use of edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token in project cogcomp-nlp by CogComp.
the class PreviousTags method classify.
public FeatureVector classify(Object __example) {
Token word = (Token) __example;
FeatureVector __result;
__result = new FeatureVector();
String __id;
String __value;
int i;
Token w = word;
for (i = 0; i > -2 && w.previous != null; --i) {
w = (Token) w.previous;
}
for (; w != word; w = (Token) w.next) {
if (Chunker.isTraining) {
__id = "" + (i++);
__value = w.label;
__result.addFeature(new DiscretePrimitiveStringFeature(this.containingPackage, this.name, __id, __value, valueIndexOf(__value), (short) 0));
} else {
__id = "" + (i++);
__value = __Chunker.discreteValue(w);
__result.addFeature(new DiscretePrimitiveStringFeature(this.containingPackage, this.name, __id, __value, valueIndexOf(__value), (short) 0));
}
}
return __result;
}
use of edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token in project cogcomp-nlp by CogComp.
the class SOPrevious method classify.
public FeatureVector classify(Object __example) {
Token word = (Token) __example;
FeatureVector __result;
__result = new FeatureVector();
String __id;
String __value;
int i;
Token w = word;
for (i = 0; i > -2 && w.previous != null; --i) {
w = (Token) w.previous;
}
String[] tags = new String[3];
String[] labels = new String[2];
i = 0;
for (; w != word; w = (Token) w.next) {
tags[i] = __POSTagger.discreteValue(w);
if (Chunker.isTraining) {
labels[i] = w.label;
} else {
labels[i] = __Chunker.discreteValue(w);
}
i++;
}
tags[i] = __POSTagger.discreteValue(w);
__id = "ll";
__value = "" + (labels[0] + "_" + labels[1]);
__result.addFeature(new DiscretePrimitiveStringFeature(this.containingPackage, this.name, __id, __value, valueIndexOf(__value), (short) 0));
__id = "lt1";
__value = "" + (labels[0] + "_" + tags[1]);
__result.addFeature(new DiscretePrimitiveStringFeature(this.containingPackage, this.name, __id, __value, valueIndexOf(__value), (short) 0));
__id = "lt2";
__value = "" + (labels[1] + "_" + tags[2]);
__result.addFeature(new DiscretePrimitiveStringFeature(this.containingPackage, this.name, __id, __value, valueIndexOf(__value), (short) 0));
return __result;
}
use of edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token in project cogcomp-nlp by CogComp.
the class Reuters2003Parser method next.
/**
* Produces the next object parsed from the input file; in this case, that object is guaranteed
* to be a <code>LinkedVector</code> populated by <code>Token</code>s representing a sentence.
**/
public Object next() {
String[] line = (String[]) super.next();
while (line != null && (line.length < 2 || line[4].equals("-X-"))) line = (String[]) super.next();
if (line == null)
return null;
if (line[3].charAt(0) == 'I')
line[3] = "B" + line[3].substring(1);
Token t = new Token(new Word(line[5], line[4]), null, line[3]);
String previous = line[3];
for (line = (String[]) super.next(); line != null && line.length > 0; line = (String[]) super.next()) {
if (line[3].charAt(0) == 'I' && !previous.endsWith(line[3].substring(2)))
line[3] = "B" + line[3].substring(1);
t.next = new Token(new Word(line[5], line[4]), t, line[3]);
t = (Token) t.next;
previous = line[3];
}
return new LinkedVector(t);
}
use of edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token in project cogcomp-nlp by CogComp.
the class ChunkerAnnotator method addView.
@Override
public void addView(TextAnnotation record) throws AnnotatorException {
if (!record.hasView(tokensfield) || !record.hasView(sentencesfield) || !record.hasView(posfield)) {
String msg = "Record must be tokenized, sentence split, and POS-tagged first.";
logger.error(msg);
throw new AnnotatorException(msg);
}
List<Constituent> tags = record.getView(posfield).getConstituents();
List<Token> lbjTokens = LBJavaUtils.recordToLBJTokens(record);
View chunkView = new SpanLabelView(ViewNames.SHALLOW_PARSE, this.NAME, record, 1.0);
int currentChunkStart = 0;
int currentChunkEnd = 0;
String clabel = "";
Constituent previous = null;
int tcounter = 0;
for (Token lbjtoken : lbjTokens) {
Constituent current = tags.get(tcounter);
tagger.discreteValue(lbjtoken);
logger.debug("{} {}", lbjtoken.toString(), (null == lbjtoken.type) ? "NULL" : lbjtoken.type);
// what happens if we see an Inside tag -- even if it doesn't follow a Before tag
if (null != lbjtoken.type && lbjtoken.type.charAt(0) == 'I') {
if (lbjtoken.type.length() < 3)
throw new IllegalArgumentException("Chunker word label '" + lbjtoken.type + "' is too short!");
if (// we must have just seen an Outside tag and possibly completed
null == clabel) // a chunk
{
// modify lbjToken.type for later ifs
lbjtoken.type = "B" + lbjtoken.type.substring(1);
} else if (clabel.length() >= 3 && !clabel.equals(lbjtoken.type.substring(2))) {
// trying to avoid mysterious null pointer exception...
lbjtoken.type = "B" + lbjtoken.type.substring(1);
}
}
if ((lbjtoken.type.charAt(0) == 'B' || lbjtoken.type.charAt(0) == 'O') && clabel != null) {
if (previous != null) {
currentChunkEnd = previous.getEndSpan();
Constituent label = new Constituent(clabel, ViewNames.SHALLOW_PARSE, record, currentChunkStart, currentChunkEnd);
chunkView.addConstituent(label);
clabel = null;
}
// else no chunk in progress (we are at the start of the doc)
}
if (lbjtoken.type.charAt(0) == 'B') {
currentChunkStart = current.getStartSpan();
clabel = lbjtoken.type.substring(2);
}
previous = current;
tcounter++;
}
if (clabel != null && null != previous) {
currentChunkEnd = previous.getEndSpan();
Constituent label = new Constituent(clabel, ViewNames.SHALLOW_PARSE, record, currentChunkStart, currentChunkEnd);
chunkView.addConstituent(label);
}
record.addView(ViewNames.SHALLOW_PARSE, chunkView);
// chunkView;
return;
}
use of edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token in project cogcomp-nlp by CogComp.
the class suffixFeatures method classify.
public FeatureVector classify(Object __example) {
Token w = (Token) __example;
FeatureVector __result;
__result = new FeatureVector();
String __id;
String __value;
int length = w.form.length();
boolean unknown = POSTaggerUnknown.isTraining && __baselineTarget.observedCount(w.form) <= POSLabeledUnknownWordParser.threshold || !POSTaggerUnknown.isTraining && __baselineTarget.discreteValue(w).equals("UNKNOWN");
if (unknown && length > 3 && Character.isLetter(w.form.charAt(length - 1))) {
__id = "" + (w.form.substring(length - 1).toLowerCase());
__value = "true";
__result.addFeature(new DiscretePrimitiveStringFeature(this.containingPackage, this.name, __id, __value, valueIndexOf(__value), (short) 0));
if (Character.isLetter(w.form.charAt(length - 2))) {
__id = "" + (w.form.substring(length - 2).toLowerCase());
__value = "true";
__result.addFeature(new DiscretePrimitiveStringFeature(this.containingPackage, this.name, __id, __value, valueIndexOf(__value), (short) 0));
if (length > 4 && Character.isLetter(w.form.charAt(length - 3))) {
__id = "" + (w.form.substring(length - 3).toLowerCase());
__value = "true";
__result.addFeature(new DiscretePrimitiveStringFeature(this.containingPackage, this.name, __id, __value, valueIndexOf(__value), (short) 0));
}
}
}
return __result;
}
Aggregations