use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class LabelTwoBefore method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
String classifier;
String prefix = "LabelTwoBefore";
TextAnnotation ta = c.getTextAnnotation();
int start = c.getStartSpan() - 2;
int end = c.getEndSpan() - 2;
Set<Feature> features = new LinkedHashSet<>();
for (int i = start; i < end; i++) {
if (!isPOSFromCounting) {
classifier = prefix + "_" + "POS";
if (i >= 0) {
TokenLabelView POSView = (TokenLabelView) ta.getView(ViewNames.POS);
String form = ta.getToken(i);
String tag = POSView.getLabel(i);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
} else if (isBaseLineCounting) {
classifier = prefix + "_" + "BaselinePOS";
if (i >= 0) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
} else {
classifier = prefix + "_" + "MikheevPOS";
if (i >= 0) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
}
}
return features;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class POSBaseLineFeatureExtractor method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
String classifier = "BaseLinePOS";
TextAnnotation ta = c.getTextAnnotation();
int start = c.getStartSpan();
int end = c.getEndSpan();
Set<Feature> features = new LinkedHashSet<>();
for (int i = start; i < end; i++) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
}
return features;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class POSWindowTwo method getFeatures.
@Override
public /**
* This feature extractor assumes that the TOKEN View, POS View have been
* generated in the Constituents TextAnnotation. It will use its own POS tag of the
* two context words before and after the constituent.
*/
Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
View TOKENS = null, POS = null;
try {
TOKENS = ta.getView(ViewNames.TOKENS);
POS = ta.getView(ViewNames.POS);
} catch (Exception e) {
e.printStackTrace();
}
// We can assume that the constituent in this case is a Word(Token) described by the LBJ
// chunk definition
int startspan = c.getStartSpan();
int endspan = c.getEndSpan();
// All our constituents are words(tokens)
// words two before & after
int k = 2;
String[] tags = getwindowtagskfrom(TOKENS, POS, startspan, endspan, k);
String classifier = "POSWindowTwo";
String __id, __value;
Set<Feature> __result = new LinkedHashSet<Feature>();
for (int i = 0; i < tags.length; i++) {
if (tags[i] == null) {
continue;
} else {
__id = classifier + ":" + i;
__value = "(" + tags[i] + ")";
logger.info(__id + __value);
__result.add(new DiscreteFeature(__id + __value));
}
}
return __result;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class PPFeatures method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
TreeView parse = (TreeView) ta.getView(parseViewName);
Set<Feature> feats = new HashSet<>();
try {
Constituent phrase = parse.getParsePhrase(c);
// if the phrase is a PP, then the head word of its
// rightmost NP child.
List<Relation> rels = phrase.getOutgoingRelations();
for (int i = rels.size() - 1; i >= 0; i--) {
Relation relation = rels.get(i);
if (relation == null)
continue;
Constituent target = relation.getTarget();
if (ParseTreeProperties.isNominal(target.getLabel())) {
int head = CollinsHeadFinder.getInstance().getHeadWordPosition(phrase);
feats.add(DiscreteFeature.create("np-head:" + ta.getToken(head).toLowerCase()));
feats.add(DiscreteFeature.create("np-head-pos:" + WordHelpers.getPOS(ta, head)));
break;
}
}
// if the phrase's parent is a PP, then the head of that PP.
Constituent parent = phrase.getIncomingRelations().get(0).getSource();
if (parent.getLabel().equals("PP")) {
int head = CollinsHeadFinder.getInstance().getHeadWordPosition(phrase);
feats.add(DiscreteFeature.create("p-head:" + ta.getToken(head).toLowerCase()));
}
} catch (EdisonException e) {
throw new RuntimeException(e);
} catch (Exception e) {
e.printStackTrace();
}
return feats;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class ProjectedPath method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
TreeView parse = (TreeView) ta.getView(parseViewName);
Set<Feature> feats = new HashSet<>();
// Clone this to avoid concurrency problems
Constituent c2 = null;
try {
c2 = parse.getParsePhrase(c).cloneForNewView("");
} catch (Exception e) {
e.printStackTrace();
}
assert c2 != null;
if (!c2.getLabel().equals("VP"))
return feats;
boolean found = false;
boolean done = false;
while (!done) {
List<Relation> rels = c2.getIncomingRelations();
if (rels.size() == 0)
done = true;
else {
Constituent parent = rels.get(0).getSource();
if (parent.getLabel().equals("VP")) {
found = true;
c2 = parent;
} else {
done = true;
}
}
}
if (found) {
// Clone this to avoid concurrency problems
Constituent c1 = null;
try {
c1 = parse.getParsePhrase(c.getIncomingRelations().get(0).getSource()).cloneForNewView("");
} catch (Exception e) {
e.printStackTrace();
}
assert c1 != null;
String path = PathFeatureHelper.getFullParsePathString(c1, c2, 400);
feats.add(DiscreteFeature.create(path));
}
return feats;
}
Aggregations