use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class PPFeatures method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
TreeView parse = (TreeView) ta.getView(parseViewName);
Set<Feature> feats = new HashSet<>();
try {
Constituent phrase = parse.getParsePhrase(c);
// if the phrase is a PP, then the head word of its
// rightmost NP child.
List<Relation> rels = phrase.getOutgoingRelations();
for (int i = rels.size() - 1; i >= 0; i--) {
Relation relation = rels.get(i);
if (relation == null)
continue;
Constituent target = relation.getTarget();
if (ParseTreeProperties.isNominal(target.getLabel())) {
int head = CollinsHeadFinder.getInstance().getHeadWordPosition(phrase);
feats.add(DiscreteFeature.create("np-head:" + ta.getToken(head).toLowerCase()));
feats.add(DiscreteFeature.create("np-head-pos:" + WordHelpers.getPOS(ta, head)));
break;
}
}
// if the phrase's parent is a PP, then the head of that PP.
Constituent parent = phrase.getIncomingRelations().get(0).getSource();
if (parent.getLabel().equals("PP")) {
int head = CollinsHeadFinder.getInstance().getHeadWordPosition(phrase);
feats.add(DiscreteFeature.create("p-head:" + ta.getToken(head).toLowerCase()));
}
} catch (EdisonException e) {
throw new RuntimeException(e);
} catch (Exception e) {
e.printStackTrace();
}
return feats;
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class ProjectedPath method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
TreeView parse = (TreeView) ta.getView(parseViewName);
Set<Feature> feats = new HashSet<>();
// Clone this to avoid concurrency problems
Constituent c2 = null;
try {
c2 = parse.getParsePhrase(c).cloneForNewView("");
} catch (Exception e) {
e.printStackTrace();
}
assert c2 != null;
if (!c2.getLabel().equals("VP"))
return feats;
boolean found = false;
boolean done = false;
while (!done) {
List<Relation> rels = c2.getIncomingRelations();
if (rels.size() == 0)
done = true;
else {
Constituent parent = rels.get(0).getSource();
if (parent.getLabel().equals("VP")) {
found = true;
c2 = parent;
} else {
done = true;
}
}
}
if (found) {
// Clone this to avoid concurrency problems
Constituent c1 = null;
try {
c1 = parse.getParsePhrase(c.getIncomingRelations().get(0).getSource()).cloneForNewView("");
} catch (Exception e) {
e.printStackTrace();
}
assert c1 != null;
String path = PathFeatureHelper.getFullParsePathString(c1, c2, 400);
feats.add(DiscreteFeature.create(path));
}
return feats;
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class WordConjunctionOneTwoThreeGramWindowTwo method getFeatures.
@Override
public /**
* This feature extractor assumes that the TOKEN View has been generated in the Constituents TextAnnotation.
* It generate a feature for a window [-2, +2] of Forms (original text) for each constituent.
*
**/
Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
TOKENS = ta.getView(ViewNames.TOKENS);
// We can assume that the constituent in this case is a Word(Token)
int startspan = c.getStartSpan();
int endspan = c.getEndSpan();
// k is 3 since we need up to 3-grams
int k = 3;
int window = 2;
// All our constituents are words(tokens)
String[] forms = getWindowK(TOKENS, startspan, endspan, window);
String id, value;
String classifier = "WordConjunctionOneTwoThreeGramWindowTwo";
Set<Feature> result = new LinkedHashSet<>();
for (int j = 0; j < k; j++) {
for (int i = 0; i < forms.length; i++) {
// forms.length = 5, So i goes from 0 to 4, for each String in the forms array.
StringBuilder f = new StringBuilder();
// and three word combinations within [-2,2] window or words.
for (int context = 0; context <= j && i + context < forms.length; context++) {
// add a '_' between words to conjoin them together
if (context != 0) {
f.append("_");
}
f.append(forms[i + context]);
}
// 2 is the center object in the array so i should go from -2 to +2 (with 0 being
// the center)
// j is the size of the n-gram so it goes 1 to 3
id = classifier + ":" + ((i - window) + "_" + (j + 1));
value = "(" + (f.toString()) + ")";
result.add(new DiscreteFeature(id + value));
}
}
return result;
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class HasVerb method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
boolean hasVerb = false;
TextAnnotation ta = c.getTextAnnotation();
for (int i = c.getStartSpan(); i < c.getEndSpan(); i++) {
if (POSUtils.isPOSVerb(WordHelpers.getPOS(ta, i))) {
hasVerb = true;
break;
}
}
Set<Feature> feats = new HashSet<>();
if (hasVerb) {
feats.add(DiscreteFeature.create(getName()));
}
return feats;
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class ParsePath method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
TreeView parse = (TreeView) ta.getView(parseViewName);
Set<Feature> features = new LinkedHashSet<>();
List<Relation> incomingRelations = c.getIncomingRelations();
if (incomingRelations.size() > 0) {
Constituent c1, c2;
try {
c1 = parse.getParsePhrase(incomingRelations.get(0).getSource());
c2 = parse.getParsePhrase(c);
} catch (Exception e) {
throw new EdisonException(e);
}
Pair<List<Constituent>, List<Constituent>> paths = PathFeatureHelper.getPathsToCommonAncestor(c1, c2, 400);
List<Constituent> list = new ArrayList<>();
for (int i = 0; i < paths.getFirst().size() - 1; i++) {
list.add(paths.getFirst().get(i));
}
Constituent top = paths.getFirst().get(paths.getFirst().size() - 1);
list.add(top);
for (int i = paths.getSecond().size() - 2; i >= 0; i--) {
list.add(paths.getSecond().get(i));
}
StringBuilder sb = new StringBuilder();
for (int i = 0; i < paths.getFirst().size() - 1; i++) {
Constituent cc = paths.getFirst().get(i);
sb.append(cc.getLabel());
sb.append(PathFeatureHelper.PATH_UP_STRING);
}
String pathToAncestor = sb.toString();
String pathString = PathFeatureHelper.getPathString(paths, true, false);
features.add(DiscreteFeature.create(pathString));
features.add(DiscreteFeature.create(pathToAncestor));
features.add(RealFeature.create("l", list.size()));
}
return features;
}
Aggregations