use of edu.illinois.cs.cogcomp.edison.features.DiscreteFeature in project cogcomp-nlp by CogComp.
the class LabelTwoAfter method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
String classifier;
String prefix = "LabelTwoAfter";
TextAnnotation ta = c.getTextAnnotation();
int lenOfTokens = ta.getTokens().length;
int start = c.getStartSpan() + 2;
int end = c.getEndSpan() + 2;
Set<Feature> features = new LinkedHashSet<>();
for (int i = start; i < end; i++) {
if (!isPOSFromCounting) {
classifier = prefix + "_" + "POS";
if (i < lenOfTokens) {
TokenLabelView POSView = (TokenLabelView) ta.getView(ViewNames.POS);
String form = ta.getToken(i);
String tag = POSView.getLabel(i);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
} else if (isBaseLineCounting) {
classifier = prefix + "_" + "BaselinePOS";
if (i < lenOfTokens) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
} else {
classifier = prefix + "_" + "MikheevPOS";
if (i < lenOfTokens) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
}
}
return features;
}
use of edu.illinois.cs.cogcomp.edison.features.DiscreteFeature in project cogcomp-nlp by CogComp.
the class MixedChunkWindowTwoBeforePOSWindowThreeBefore method getFeatures.
@Override
public /**
* This feature extractor assumes that the TOKEN View, POS View and the SHALLOW_PARSE View have been
* generated in the Constituents TextAnnotation. It will use its own POS tag and well as the POS tag
* and the SHALLOW_PARSE (Chunk) labels of the previous two tokens and return it as a discrete feature.
*/
Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
try {
TOKENS = ta.getView(ViewNames.TOKENS);
POS = ta.getView(ViewNames.POS);
SHALLOW_PARSE = ta.getView(ViewNames.SHALLOW_PARSE);
} catch (Exception e) {
e.printStackTrace();
}
// We can assume that the constituent in this case is a Word(Token) described by the LBJ
// chunk definition
int startspan = c.getStartSpan();
int endspan = c.getEndSpan();
// All our constituents are words(tokens)
// words two before
int k = -2;
List<Constituent> wordstwobefore = getwordskfrom(TOKENS, startspan, endspan, k);
if (wordstwobefore.size() != 2)
return null;
String[] tags = new String[3];
String[] labels = new String[2];
int i = 0;
for (Constituent token : wordstwobefore) {
// Should only be one POS tag for each token
List<String> POS_tag = POS.getLabelsCoveringSpan(token.getStartSpan(), token.getEndSpan());
List<String> Chunk_label = SHALLOW_PARSE.getLabelsCoveringSpan(token.getStartSpan(), token.getEndSpan());
if (POS_tag.size() != 1 || Chunk_label.size() != 1) {
logger.warn("Error token has more than one POS tag or Chunk Label.");
}
labels[i] = Chunk_label.get(0);
tags[i] = POS_tag.get(0);
i++;
}
tags[i] = POS.getLabelsCoveringSpan(startspan, endspan).get(0);
Set<Feature> __result = new LinkedHashSet<Feature>();
String classifier = "MixedChunkWindowTwoBeforePOSWindowThreeBefore";
String __id = classifier + ":" + "ll";
String __value = "(" + (labels[0] + "_" + labels[1]) + ")";
/*
* BufferedWriter output = null; try { File file = new
* File("/home/pvijaya2/feat-output.txt");
*
* if(!file.exists()){ file.createNewFile(); }
*
* FileWriter fw = new FileWriter(file,true);
*
* //BufferedWriter writer give better performance BufferedWriter bw = new
* BufferedWriter(fw);
*/
logger.info(__id + __value);
__result.add(new DiscreteFeature(__id + __value));
__id = classifier + ":" + "lt1";
__value = "(" + (labels[0] + "_" + tags[1]) + ")";
logger.info(__id + __value);
__result.add(new DiscreteFeature(__id + __value));
__id = classifier + ":" + "lt2";
__value = "" + (labels[1] + "_" + tags[2]);
logger.info(__id + __value);
__result.add(new DiscreteFeature(__id + __value));
return __result;
}
use of edu.illinois.cs.cogcomp.edison.features.DiscreteFeature in project cogcomp-nlp by CogComp.
the class LabelTwoBefore method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
String classifier;
String prefix = "LabelTwoBefore";
TextAnnotation ta = c.getTextAnnotation();
int start = c.getStartSpan() - 2;
int end = c.getEndSpan() - 2;
Set<Feature> features = new LinkedHashSet<>();
for (int i = start; i < end; i++) {
if (!isPOSFromCounting) {
classifier = prefix + "_" + "POS";
if (i >= 0) {
TokenLabelView POSView = (TokenLabelView) ta.getView(ViewNames.POS);
String form = ta.getToken(i);
String tag = POSView.getLabel(i);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
} else if (isBaseLineCounting) {
classifier = prefix + "_" + "BaselinePOS";
if (i >= 0) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
} else {
classifier = prefix + "_" + "MikheevPOS";
if (i >= 0) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
}
}
return features;
}
use of edu.illinois.cs.cogcomp.edison.features.DiscreteFeature in project cogcomp-nlp by CogComp.
the class POSBaseLineFeatureExtractor method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
String classifier = "BaseLinePOS";
TextAnnotation ta = c.getTextAnnotation();
int start = c.getStartSpan();
int end = c.getEndSpan();
Set<Feature> features = new LinkedHashSet<>();
for (int i = start; i < end; i++) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
}
return features;
}
use of edu.illinois.cs.cogcomp.edison.features.DiscreteFeature in project cogcomp-nlp by CogComp.
the class POSWindowTwo method getFeatures.
@Override
public /**
* This feature extractor assumes that the TOKEN View, POS View have been
* generated in the Constituents TextAnnotation. It will use its own POS tag of the
* two context words before and after the constituent.
*/
Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
View TOKENS = null, POS = null;
try {
TOKENS = ta.getView(ViewNames.TOKENS);
POS = ta.getView(ViewNames.POS);
} catch (Exception e) {
e.printStackTrace();
}
// We can assume that the constituent in this case is a Word(Token) described by the LBJ
// chunk definition
int startspan = c.getStartSpan();
int endspan = c.getEndSpan();
// All our constituents are words(tokens)
// words two before & after
int k = 2;
String[] tags = getwindowtagskfrom(TOKENS, POS, startspan, endspan, k);
String classifier = "POSWindowTwo";
String __id, __value;
Set<Feature> __result = new LinkedHashSet<Feature>();
for (int i = 0; i < tags.length; i++) {
if (tags[i] == null) {
continue;
} else {
__id = classifier + ":" + i;
__value = "(" + tags[i] + ")";
logger.info(__id + __value);
__result.add(new DiscreteFeature(__id + __value));
}
}
return __result;
}
Aggregations