use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.
the class PosWordConjunctionSizeTwoWindowSizeTwo method getFeatures.
@Override
public /**
* This feature extractor assumes that the TOKEN View, POS View have been
* generated in the Constituents TextAnnotation. It will use its own POS tag and well
* as the form of the word as a forms of the words around the constitent a
*
**/
Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
View TOKENS = null, POS = null;
try {
TOKENS = ta.getView(ViewNames.TOKENS);
POS = ta.getView(ViewNames.POS);
} catch (Exception e) {
e.printStackTrace();
}
// We can assume that the constituent in this case is a Word(Token) described by the LBJ
// chunk definition
int startspan = c.getStartSpan();
int endspan = c.getEndSpan();
// All our constituents are words(tokens)
// words two before & after
int k = 2;
int window = 2;
String[] forms = getWindowK(TOKENS, startspan, endspan, k);
String[] tags = getWindowKTags(POS, startspan, endspan, k);
String classifier = "PosWordConjunctionSizeTwoWindowSizeTwo";
String id, value;
Set<Feature> result = new LinkedHashSet<>();
for (int j = 0; j < k; j++) {
for (int i = 0; i < tags.length; i++) {
StringBuilder f = new StringBuilder();
for (int context = 0; context <= j && i + context < tags.length; context++) {
if (context != 0) {
f.append("_");
}
f.append(tags[i + context]);
f.append("-");
f.append(forms[i + context]);
}
// 2 is the center object in the array so i should go from -2 to +2 (with 0 being
// the center)
// j is the size of the n-gram so it goes 1 to 2
id = classifier + ":" + ((i - window) + "_" + (j + 1));
value = "(" + (f.toString()) + ")";
result.add(new DiscreteFeature(id + value));
}
}
return result;
}
use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.
the class FeatureManifest method processQuery.
/**
* This deals with if statements.
*
* @param tree
* @param cf
* @return
* @throws EdisonException
*/
public FeatureExtractor processQuery(Tree<String> tree, Map<String, FeatureExtractor> cf) throws EdisonException {
String uniqueLabel = uniquify(tree);
if (cf.containsKey(uniqueLabel))
return cf.get(uniqueLabel);
if (tree.getNumberOfChildren() != 3) {
throw new EdisonException("Invalid query. Expecting (if <query> <if-true> <if-false>).\n" + tree);
}
Tree<String> condition = tree.getChild(0);
Predicate<Constituent> predicate;
if (condition.getLabel().equals("exists")) {
predicate = processExists(condition);
} else {
throw new EdisonException("Unknown query '" + condition.getLabel() + "'");
}
FeatureExtractor ifTrue = createFex(tree.getChild(1), cf);
FeatureExtractor ifFalse = createFex(tree.getChild(2), cf);
ConditionalFeatureExtractor fex = new ConditionalFeatureExtractor(predicate, ifTrue, ifFalse);
CachedFeatureCollection cfx = new CachedFeatureCollection("", fex);
cf.put(uniqueLabel, cfx);
return cfx;
}
use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.
the class FeatureManifest method getWordNetFeatureExtractor.
private FeatureExtractor getWordNetFeatureExtractor(List<String> wnLabels, Map<String, FeatureExtractor> cf) throws EdisonException {
String uniqueLabel = uniquify(wnLabels);
if (cf.containsKey(uniqueLabel))
return cf.get(uniqueLabel);
try {
WordNetFeatureExtractor wn = new WordNetFeatureExtractor();
for (String label : wnLabels) {
if (!WordNetClasses.wnClasses.containsKey(label))
throw new EdisonException("Unknown wordnet feature extractor '" + label + "', expecting one of " + WordNetClasses.wnClasses.keySet());
wn.addFeatureType(WordNetClasses.wnClasses.get(label));
}
CachedFeatureCollection f = new CachedFeatureCollection("", wn);
cf.put(uniqueLabel, f);
return f;
} catch (Exception e) {
throw new EdisonException(e);
}
}
use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.
the class FeatureManifest method processIncludeWithPrefix.
private FeatureExtractor processIncludeWithPrefix(Tree<String> tree, Map<String, FeatureExtractor> cf) throws EdisonException {
String uniqueLabel = uniquify(tree);
if (cf.containsKey(uniqueLabel))
return cf.get(uniqueLabel);
FeatureCollection fex = new FeatureCollection("");
if (tree.getNumberOfChildren() == 0)
throw new EdisonException("Invalid declaration for conjoin-and-include\n" + tree);
FeatureExtractor firstChild = createFex(tree.getChild(0), cf);
fex.addFeatureExtractor(firstChild);
if (tree.getNumberOfChildren() > 1) {
FeatureExtractor conjoin = new FeatureCollection("", firstChild);
for (int childId = 1; childId < tree.getNumberOfChildren(); childId++) {
FeatureExtractor ff = createFex(tree.getChild(childId), cf);
conjoin = FeatureUtilities.conjoin(conjoin, ff);
fex.addFeatureExtractor(ff);
}
fex.addFeatureExtractor(conjoin);
}
CachedFeatureCollection cfx = new CachedFeatureCollection("", fex);
cf.put(uniquify(tree), cfx);
return cfx;
}
use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.
the class SubcategorizationFrame method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
Set<Feature> features = new LinkedHashSet<>();
TreeView view = (TreeView) c.getTextAnnotation().getView(parseViewName);
Constituent phrase;
try {
phrase = view.getParsePhrase(c);
} catch (Exception e) {
throw new EdisonException(e);
}
List<Relation> incomingRelations = phrase.getIncomingRelations();
if (incomingRelations == null) {
features.add(DiscreteFeature.create("root"));
} else {
Constituent parent = incomingRelations.get(0).getSource();
StringBuilder subcat = new StringBuilder();
subcat.append(parent.getLabel()).append(">");
for (Relation r : parent.getOutgoingRelations()) {
if (r.getTarget() == phrase) {
subcat.append("(").append(r.getTarget().getLabel()).append(")");
} else {
subcat.append(r.getTarget().getLabel());
}
}
features.add(DiscreteFeature.create(subcat.toString()));
}
return features;
}
Aggregations