use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView in project cogcomp-nlp by CogComp.
the class LabelTwoAfter method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
String classifier;
String prefix = "LabelTwoAfter";
TextAnnotation ta = c.getTextAnnotation();
int lenOfTokens = ta.getTokens().length;
int start = c.getStartSpan() + 2;
int end = c.getEndSpan() + 2;
Set<Feature> features = new LinkedHashSet<>();
for (int i = start; i < end; i++) {
if (!isPOSFromCounting) {
classifier = prefix + "_" + "POS";
if (i < lenOfTokens) {
TokenLabelView POSView = (TokenLabelView) ta.getView(ViewNames.POS);
String form = ta.getToken(i);
String tag = POSView.getLabel(i);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
} else if (isBaseLineCounting) {
classifier = prefix + "_" + "BaselinePOS";
if (i < lenOfTokens) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
} else {
classifier = prefix + "_" + "MikheevPOS";
if (i < lenOfTokens) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
}
}
return features;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView in project cogcomp-nlp by CogComp.
the class LabelTwoBefore method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
String classifier;
String prefix = "LabelTwoBefore";
TextAnnotation ta = c.getTextAnnotation();
int start = c.getStartSpan() - 2;
int end = c.getEndSpan() - 2;
Set<Feature> features = new LinkedHashSet<>();
for (int i = start; i < end; i++) {
if (!isPOSFromCounting) {
classifier = prefix + "_" + "POS";
if (i >= 0) {
TokenLabelView POSView = (TokenLabelView) ta.getView(ViewNames.POS);
String form = ta.getToken(i);
String tag = POSView.getLabel(i);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
} else if (isBaseLineCounting) {
classifier = prefix + "_" + "BaselinePOS";
if (i >= 0) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
} else {
classifier = prefix + "_" + "MikheevPOS";
if (i >= 0) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
}
}
return features;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView in project cogcomp-nlp by CogComp.
the class StanfordTrueCaseHandler method addView.
@Override
protected void addView(TextAnnotation ta) throws AnnotatorException {
Annotation document = new Annotation(ta.text);
pipeline.annotate(document);
TokenLabelView vu = new TokenLabelView(viewName, ta);
for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
String trueCase = token.get(CoreAnnotations.TrueCaseTextAnnotation.class);
int beginCharOffsetS = token.beginPosition();
int endCharOffset = token.endPosition() - 1;
List<Constituent> overlappingCons = ta.getView(ViewNames.TOKENS).getConstituentsOverlappingCharSpan(beginCharOffsetS, endCharOffset);
int endIndex = overlappingCons.stream().max(Comparator.comparing(Constituent::getEndSpan)).get().getEndSpan();
Constituent c = new Constituent(trueCase, viewName, ta, endIndex - 1, endIndex);
vu.addConstituent(c);
}
}
ta.addView(viewName, vu);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView in project cogcomp-nlp by CogComp.
the class IllinoisLemmatizer method createLemmaView.
/**
* create a Lemma view in the TextAnnotation argument, and return a reference to that View.
*/
public View createLemmaView(TextAnnotation inputTa) throws IOException {
String[] toks = inputTa.getTokens();
TokenLabelView lemmaView = new TokenLabelView(ViewNames.LEMMA, NAME, inputTa, 1.0);
for (int i = 0; i < toks.length; ++i) {
String lemma = getLemma(inputTa, i);
Constituent lemmaConstituent = new Constituent(lemma, ViewNames.LEMMA, inputTa, i, i + 1);
lemmaView.addConstituent(lemmaConstituent);
}
inputTa.addView(ViewNames.LEMMA, lemmaView);
return lemmaView;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView in project cogcomp-nlp by CogComp.
the class TestDiff method testAnnotatorDiff.
@Test
public void testAnnotatorDiff() {
POSAnnotator annotator = new POSAnnotator();
TextAnnotation record = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(refTokens);
try {
annotator.getView(record);
} catch (AnnotatorException e) {
fail("AnnotatorException thrown!\n" + e.getMessage());
}
TokenLabelView view = (TokenLabelView) record.getView(ViewNames.POS);
if (refTags.size() != view.getNumberOfConstituents()) {
fail("Number of tokens tagged in annotator does not match actual number of tokens!");
}
int correctCounter = 0;
for (int i = 0; i < refTags.size(); i++) {
if (view.getLabel(i).equals(refTags.get(i))) {
correctCounter++;
}
}
double result = ((double) correctCounter) / refTags.size();
if (result < thresholdAcc) {
fail("Tagger performance is insufficient: " + "\nProduced: " + result + "\nExpected: " + thresholdAcc);
}
}
Aggregations