use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView in project cogcomp-nlp by CogComp.
the class LabelTwoAfter method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
String classifier;
String prefix = "LabelTwoAfter";
TextAnnotation ta = c.getTextAnnotation();
int lenOfTokens = ta.getTokens().length;
int start = c.getStartSpan() + 2;
int end = c.getEndSpan() + 2;
Set<Feature> features = new LinkedHashSet<>();
for (int i = start; i < end; i++) {
if (!isPOSFromCounting) {
classifier = prefix + "_" + "POS";
if (i < lenOfTokens) {
TokenLabelView POSView = (TokenLabelView) ta.getView(ViewNames.POS);
String form = ta.getToken(i);
String tag = POSView.getLabel(i);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
} else if (isBaseLineCounting) {
classifier = prefix + "_" + "BaselinePOS";
if (i < lenOfTokens) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
} else {
classifier = prefix + "_" + "MikheevPOS";
if (i < lenOfTokens) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
}
}
return features;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView in project cogcomp-nlp by CogComp.
the class LabelTwoBefore method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
String classifier;
String prefix = "LabelTwoBefore";
TextAnnotation ta = c.getTextAnnotation();
int start = c.getStartSpan() - 2;
int end = c.getEndSpan() - 2;
Set<Feature> features = new LinkedHashSet<>();
for (int i = start; i < end; i++) {
if (!isPOSFromCounting) {
classifier = prefix + "_" + "POS";
if (i >= 0) {
TokenLabelView POSView = (TokenLabelView) ta.getView(ViewNames.POS);
String form = ta.getToken(i);
String tag = POSView.getLabel(i);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
} else if (isBaseLineCounting) {
classifier = prefix + "_" + "BaselinePOS";
if (i >= 0) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
} else {
classifier = prefix + "_" + "MikheevPOS";
if (i >= 0) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
}
}
return features;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView in project cogcomp-nlp by CogComp.
the class LabelOneBefore method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
String classifier;
String prefix = "LabelOneBefore";
TextAnnotation ta = c.getTextAnnotation();
int start = c.getStartSpan() - 1;
int end = c.getEndSpan() - 1;
Set<Feature> features = new LinkedHashSet<>();
for (int i = start; i < end; i++) {
if (!isPOSFromCounting) {
classifier = prefix + "_" + "POS";
if (i >= 0) {
TokenLabelView POSView = (TokenLabelView) ta.getView(ViewNames.POS);
String form = ta.getToken(i);
String tag = POSView.getLabel(i);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
} else if (isBaseLineCounting) {
classifier = prefix + "_" + "BaselinePOS";
if (i >= 0) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
} else {
classifier = prefix + "_" + "MikheevPOS";
if (i >= 0) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
}
}
return features;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView in project cogcomp-nlp by CogComp.
the class MascXCESReader method createTokenLabelView.
/**
* Helper for create a TokenLabelView from a stream of token labels
*/
private static int createTokenLabelView(Stream<Pair<Integer, String>> tokenLabels, TextAnnotation ta, String viewName) {
TokenLabelView view = new TokenLabelView(viewName, "GoldStandard", ta, 1.0);
tokenLabels.forEach(label -> view.addTokenLabel(label.getFirst(), label.getSecond(), 1.0));
ta.addView(viewName, view);
return view.count();
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView in project cogcomp-nlp by CogComp.
the class PennTreebankPOSReader method createTextAnnotation.
/**
* Create a new {@link TextAnnotation} from a single line of bracketed text
*
* @param line The bracketed string to be processed
* @param lineId The ID of the {@link TextAnnotation}
* @return A {@link TextAnnotation} with a populated {@link ViewNames#POS} view
*/
public TextAnnotation createTextAnnotation(String line, String lineId) {
String[] wordPOSPairs = splitWordsPattern.split(line.substring(1, line.length() - 1));
List<String> words = new ArrayList<>(wordPOSPairs.length);
List<String> pos = new ArrayList<>(wordPOSPairs.length);
for (String wordPOSPair : wordPOSPairs) {
String[] split = whitespacePattern.split(wordPOSPair);
words.add(split[1]);
pos.add(split[0]);
}
List<String[]> tokenizedSentences = Collections.singletonList(words.toArray(new String[words.size()]));
TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(corpusName, lineId, tokenizedSentences);
TokenLabelView posView = new TokenLabelView(ViewNames.POS, ta);
for (int i = 0; i < pos.size(); i++) posView.addTokenLabel(i, pos.get(i), 1.0);
ta.addView(ViewNames.POS, posView);
return ta;
}
Aggregations