use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class POSMikheevFeatureExtractor method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
String classifier = "MikheevPOS";
TextAnnotation ta = c.getTextAnnotation();
int start = c.getStartSpan();
int end = c.getEndSpan();
Set<Feature> features = new LinkedHashSet<>();
for (int i = start; i < end; i++) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
}
return features;
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class POSWindow method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
String classifier;
String prefix = "POSWindow";
TextAnnotation ta = c.getTextAnnotation();
int lenOfTokens = ta.getTokens().length;
int start = c.getStartSpan();
int end = c.getEndSpan();
Set<Feature> features = new LinkedHashSet<>();
for (int i = start; i < end; i++) {
int curr = i, last = i;
// for (; last <= i+2 && last < lenOfTokens; ++last)
for (int j = 0; j < 2 && curr > 0; j++) curr -= 1;
for (int j = 0; j < 2 && last < lenOfTokens - 1; j++) last += 1;
if (!isPOSFromCounting) {
classifier = prefix + "_" + "POS";
for (; curr <= last; curr++) {
TokenLabelView POSView = (TokenLabelView) ta.getView(ViewNames.POS);
String form = ta.getToken(curr);
String tag = POSView.getLabel(curr);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
}
} else if (isBaseLineCounting) {
classifier = prefix + "_" + "BaselinePOS";
for (; curr <= last; curr++) {
String form = ta.getToken(curr);
String tag = counter.tag(curr, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
}
} else {
classifier = prefix + "_" + "MikheevPOS";
for (; curr <= last; curr++) {
String form = ta.getToken(curr);
String tag = counter.tag(curr, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
}
}
}
return features;
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class PosWordConjunctionSizeTwoWindowSizeTwo method getFeatures.
@Override
public /**
* This feature extractor assumes that the TOKEN View, POS View have been
* generated in the Constituents TextAnnotation. It will use its own POS tag and well
* as the form of the word as a forms of the words around the constitent a
*
**/
Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
View TOKENS = null, POS = null;
try {
TOKENS = ta.getView(ViewNames.TOKENS);
POS = ta.getView(ViewNames.POS);
} catch (Exception e) {
e.printStackTrace();
}
// We can assume that the constituent in this case is a Word(Token) described by the LBJ
// chunk definition
int startspan = c.getStartSpan();
int endspan = c.getEndSpan();
// All our constituents are words(tokens)
// words two before & after
int k = 2;
int window = 2;
String[] forms = getWindowK(TOKENS, startspan, endspan, k);
String[] tags = getWindowKTags(POS, startspan, endspan, k);
String classifier = "PosWordConjunctionSizeTwoWindowSizeTwo";
String id, value;
Set<Feature> result = new LinkedHashSet<>();
for (int j = 0; j < k; j++) {
for (int i = 0; i < tags.length; i++) {
StringBuilder f = new StringBuilder();
for (int context = 0; context <= j && i + context < tags.length; context++) {
if (context != 0) {
f.append("_");
}
f.append(tags[i + context]);
f.append("-");
f.append(forms[i + context]);
}
// 2 is the center object in the array so i should go from -2 to +2 (with 0 being
// the center)
// j is the size of the n-gram so it goes 1 to 2
id = classifier + ":" + ((i - window) + "_" + (j + 1));
value = "(" + (f.toString()) + ")";
result.add(new DiscreteFeature(id + value));
}
}
return result;
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class HyphenTagFeature method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
Set<Feature> features = new HashSet<>();
String surfaceString = c.getSurfaceForm();
if (surfaceString.contains("-") && c.length() == 1) {
Constituent predicate = c.getIncomingRelations().get(0).getSource();
String lemma = predicate.getAttribute(PredicateArgumentView.LemmaIdentifier);
assert lemma != null;
if (predicate.getSpan().equals(c.getSpan())) {
features.add(DiscreteFeature.create("pred-token"));
}
String[] parts = surfaceString.split("-");
for (int i = 0; i < parts.length; i++) {
String part = parts[i];
if (part.contains(lemma)) {
features.add(DiscreteFeature.create(i + ":pred"));
} else {
String lowerCase = part.toLowerCase();
features.add(DiscreteFeature.create(lowerCase));
features.add(DiscreteFeature.create(i + ":" + lowerCase));
}
}
}
return features;
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class LabelOneAfter method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
String classifier;
String prefix = "LabelOneAfter";
TextAnnotation ta = c.getTextAnnotation();
int lenOfTokens = ta.getTokens().length;
int start = c.getStartSpan() + 1;
int end = c.getEndSpan() + 1;
Set<Feature> features = new LinkedHashSet<>();
for (int i = start; i < end; i++) {
if (!isPOSFromCounting) {
classifier = prefix + "_" + "POS";
if (i < lenOfTokens) {
TokenLabelView POSView = (TokenLabelView) ta.getView(ViewNames.POS);
String form = ta.getToken(i);
String tag = POSView.getLabel(i);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
} else if (isBaseLineCounting) {
classifier = prefix + "_" + "BaselinePOS";
if (i < lenOfTokens) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
} else {
classifier = prefix + "_" + "MikheevPOS";
if (i < lenOfTokens) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
}
}
return features;
}
Aggregations