use of edu.illinois.cs.cogcomp.edison.features.DiscreteFeature in project cogcomp-nlp by CogComp.
the class LabeledDepFeatureGenerator method ChunkConj.
private Set<Feature> ChunkConj(int head, int dep, DepInst sent, String deprel) {
String header = "POSChunk: ";
String chunkhead = sent.strChunk[head] + " ";
String poshead = sent.strPos[head] + " ";
String chunkdep = sent.strChunk[dep] + " ";
String posdep = sent.strPos[dep] + " ";
String arcdir = "Arc-dir: " + (head < dep) + " ";
String arclength = "Arc-length " + (head - dep) + " ";
Set<Feature> feats = new HashSet<>();
feats.add(new DiscreteFeature(header + chunkhead + chunkdep + arcdir + deprel));
feats.add(new DiscreteFeature(header + chunkhead + chunkdep + arclength + deprel));
feats.add(new DiscreteFeature(header + chunkhead + poshead + chunkdep + posdep + arcdir + deprel));
feats.add(new DiscreteFeature(header + chunkhead + poshead + chunkdep + posdep + arclength + deprel));
return feats;
}
use of edu.illinois.cs.cogcomp.edison.features.DiscreteFeature in project cogcomp-nlp by CogComp.
the class Affixes method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
String classifier = "Affixes";
TextAnnotation ta = c.getTextAnnotation();
TOKENS = ta.getView(ViewNames.TOKENS);
Set<Feature> result = new LinkedHashSet<Feature>();
String id;
String value;
String word = c.getSurfaceForm();
for (int i = 3; i <= 4; ++i) {
if (word.length() > i) {
id = "p|";
value = "" + (word.substring(0, i));
result.add(new DiscreteFeature(classifier + ":" + id + "(" + value + ")"));
}
}
for (int i = 1; i <= 4; ++i) {
if (word.length() > i) {
id = "s|";
value = "" + (word.substring(word.length() - i));
result.add(new DiscreteFeature(classifier + ":" + id + "(" + value + ")"));
}
}
return result;
}
use of edu.illinois.cs.cogcomp.edison.features.DiscreteFeature in project cogcomp-nlp by CogComp.
the class POSWindow method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
String classifier;
String prefix = "POSWindow";
TextAnnotation ta = c.getTextAnnotation();
int lenOfTokens = ta.getTokens().length;
int start = c.getStartSpan();
int end = c.getEndSpan();
Set<Feature> features = new LinkedHashSet<>();
for (int i = start; i < end; i++) {
int curr = i, last = i;
// for (; last <= i+2 && last < lenOfTokens; ++last)
for (int j = 0; j < 2 && curr > 0; j++) curr -= 1;
for (int j = 0; j < 2 && last < lenOfTokens - 1; j++) last += 1;
if (!isPOSFromCounting) {
classifier = prefix + "_" + "POS";
for (; curr <= last; curr++) {
TokenLabelView POSView = (TokenLabelView) ta.getView(ViewNames.POS);
String form = ta.getToken(curr);
String tag = POSView.getLabel(curr);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
}
} else if (isBaseLineCounting) {
classifier = prefix + "_" + "BaselinePOS";
for (; curr <= last; curr++) {
String form = ta.getToken(curr);
String tag = counter.tag(curr, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
}
} else {
classifier = prefix + "_" + "MikheevPOS";
for (; curr <= last; curr++) {
String form = ta.getToken(curr);
String tag = counter.tag(curr, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
}
}
}
return features;
}
use of edu.illinois.cs.cogcomp.edison.features.DiscreteFeature in project cogcomp-nlp by CogComp.
the class POSMikheevFeatureExtractor method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
String classifier = "MikheevPOS";
TextAnnotation ta = c.getTextAnnotation();
int start = c.getStartSpan();
int end = c.getEndSpan();
Set<Feature> features = new LinkedHashSet<>();
for (int i = start; i < end; i++) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
}
return features;
}
use of edu.illinois.cs.cogcomp.edison.features.DiscreteFeature in project cogcomp-nlp by CogComp.
the class WordTypeInformation method getFeatures.
@Override
public /**
* This feature extractor assumes that the TOKEN View has been generated in the Constituents TextAnnotation.
* It generate a feature for a window [-2, +2] of Forms (original text) for each constituent.
*/
Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
TOKENS = ta.getView(ViewNames.TOKENS);
// We can assume that the constituent in this case is a Word(Token) described by the LBJ
// chunk definition
int startspan = c.getStartSpan();
int endspan = c.getEndSpan();
int k = 2;
// All our constituents are words(tokens)
String[] forms = getwindowkfrom(TOKENS, startspan, endspan, 2);
String __id, __value;
String classifier = "WordTypeInformation";
Set<Feature> __result = new LinkedHashSet<Feature>();
for (int i = 0; i < forms.length; i++) {
if (forms[i] != null) {
boolean allCapitalized = true, allDigits = true, allNonLetters = true;
for (int j = 0; j < forms[i].length(); ++j) {
allCapitalized &= Character.isUpperCase(forms[i].charAt(j));
allDigits &= Character.isDigit(forms[i].charAt(j));
allNonLetters &= !Character.isLetter(forms[i].charAt(j));
}
__id = classifier + ":" + ("c" + i);
__value = "(" + (allCapitalized) + ")";
__result.add(new DiscreteFeature(__id + __value));
__id = classifier + ":" + ("d" + i);
__value = "(" + (allDigits) + ")";
__result.add(new DiscreteFeature(__id + __value));
__id = classifier + ":" + ("c" + i);
__value = "(" + (allNonLetters) + ")";
__result.add(new DiscreteFeature(__id + __value));
}
}
return __result;
}
Aggregations