use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class DependencyPathNgrams method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
Set<Feature> features = new LinkedHashSet<>();
TreeView parse = (TreeView) ta.getView(dependencyViewName);
// get equivalent of c in the parse view
Constituent c2 = parse.getConstituentsCoveringToken(c.getStartSpan()).get(0);
List<Relation> incomingRelations = c2.getIncomingRelations();
if (incomingRelations.size() > 0) {
Constituent c1 = parse.getConstituentsCoveringToken(incomingRelations.get(0).getSource().getStartSpan()).get(0);
Pair<List<Constituent>, List<Constituent>> paths = PathFeatureHelper.getPathsToCommonAncestor(c1, c2, 400);
List<String> path = new ArrayList<>();
List<String> pos = new ArrayList<>();
for (int i = 0; i < paths.getFirst().size() - 1; i++) {
Constituent cc = paths.getFirst().get(i);
path.add(cc.getIncomingRelations().get(0).getRelationName() + PathFeatureHelper.PATH_UP_STRING);
pos.add(WordHelpers.getPOS(ta, cc.getStartSpan()) + ":" + cc.getIncomingRelations().get(0).getRelationName() + PathFeatureHelper.PATH_UP_STRING);
}
Constituent top = paths.getFirst().get(paths.getFirst().size() - 1);
pos.add(WordHelpers.getPOS(ta, top.getStartSpan()) + ":*");
path.add("*");
if (paths.getSecond().size() > 1) {
for (int i = paths.getSecond().size() - 2; i >= 0; i--) {
Constituent cc = paths.getSecond().get(i);
pos.add(WordHelpers.getPOS(ta, cc.getStartSpan()) + ":" + PathFeatureHelper.PATH_DOWN_STRING);
path.add(PathFeatureHelper.PATH_DOWN_STRING);
}
}
features.addAll(getNgrams(path, ""));
features.addAll(getNgrams(pos, "pos"));
}
return features;
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class LinearPosition method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
List<Relation> incomingRelation = c.getIncomingRelations();
Set<Feature> features = new LinkedHashSet<>();
if (incomingRelation.size() > 0) {
Constituent predicate = incomingRelation.get(0).getSource();
if (predicate.getStartSpan() >= c.getEndSpan())
features.add(BEFORE);
else if (c.getStartSpan() >= predicate.getEndSpan())
features.add(AFTER);
else
features.add(CONTAINS);
}
return features;
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class ParseHeadWordPOS method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
TreeView tree = (TreeView) ta.getView(parseViewName);
Constituent phrase;
try {
phrase = tree.getParsePhrase(c);
} catch (Exception e) {
throw new EdisonException(e);
}
Set<Feature> features = new LinkedHashSet<>();
int head = CollinsHeadFinder.getInstance().getHeadWordPosition(phrase);
features.add(DiscreteFeature.create("hw:" + ta.getToken(head).toLowerCase().trim()));
features.add(DiscreteFeature.create("h-pos:" + WordHelpers.getPOS(ta, head)));
return features;
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class TestWordTypeInformation method test.
@Test
public final void test() throws EdisonException {
log.debug("WordTypeInformation");
// Using the first TA and a constituent between span of 0 - 20 as a test
TextAnnotation ta = tas.get(1);
View TOKENS = ta.getView("TOKENS");
log.debug("GOT TOKENS FROM TEXTAnn");
List<Constituent> testlist = TOKENS.getConstituentsCoveringSpan(0, 20);
String[] teststrings = new String[5];
int i = 0, start = 1, end = 6;
for (Constituent c : testlist) {
log.debug(c.getSurfaceForm());
if (i >= start && i < end) {
teststrings[i - start] = c.getSurfaceForm();
}
i++;
}
log.debug("Testlist size is " + testlist.size());
Constituent test = testlist.get(3);
log.debug("The constituent we are extracting features from in this test is: " + test.getSurfaceForm());
WordTypeInformation wti = new WordTypeInformation("WordTypeInformation");
log.debug("Startspan is " + test.getStartSpan() + " and Endspan is " + test.getEndSpan());
Set<Feature> feats = wti.getFeatures(test);
String[] expected_outputs = { "WordTypeInformation:c0(false)", "WordTypeInformation:d0(false)", "WordTypeInformation:c1(false)", "WordTypeInformation:d1(false)", "WordTypeInformation:c2(false)", "WordTypeInformation:d2(false)", "WordTypeInformation:c2(true)", "WordTypeInformation:c3(false)", "WordTypeInformation:d3(false)", "WordTypeInformation:c4(false)", "WordTypeInformation:d4(false)", "WordTypeInformation:c4(true)" };
Set<String> __result = new LinkedHashSet<String>();
String __id;
String __value;
String classifier = "WordTypeInformation";
if (feats == null) {
log.debug("Feats are returning NULL.");
assertFalse(true);
}
log.debug("Printing Set of Features");
for (Feature f : feats) {
log.debug(f.getName());
assert (ArrayUtils.contains(expected_outputs, f.getName()));
}
for (; (start < end && teststrings[start - 1] != null); start++) {
boolean allCapitalized = true, allDigits = true, allNonLetters = true;
for (int j = 0; j < teststrings[start - 1].length(); ++j) {
allCapitalized &= Character.isUpperCase(teststrings[start - 1].charAt(j));
allDigits &= Character.isDigit(teststrings[start - 1].charAt(j));
allNonLetters &= !Character.isLetter(teststrings[start - 1].charAt(j));
}
__id = classifier + ":" + ("c" + (start - 1));
__value = "(" + (allCapitalized) + ")";
__result.add(__id + __value);
__id = classifier + ":" + ("d" + (start - 1));
__value = "(" + (allDigits) + ")";
__result.add(__id + __value);
__id = classifier + ":" + ("c" + (start - 1));
__value = "(" + (allNonLetters) + ")";
__result.add(__id + __value);
}
for (Feature feat : feats) {
if (!__result.contains(feat.getName())) {
assertFalse(true);
}
}
// System.exit(0);
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class TestPOSWindow method test.
@Test
public final void test() throws Exception {
logger.info("POSWindow Feature Extractor");
// Using the first TA and a constituent between span of 30-40 as a test
TextAnnotation ta = tas.get(2);
View TOKENS = ta.getView("TOKENS");
logger.info("GOT TOKENS FROM TEXTAnn");
List<Constituent> testlist = TOKENS.getConstituentsCoveringSpan(0, 20);
for (Constituent c : testlist) {
logger.info(c.getSurfaceForm());
}
logger.info("Testlist size is " + testlist.size());
POSBaseLineCounter posBaseLine = new POSBaseLineCounter("posBaseLine");
posBaseLine.buildTable(TestPosHelper.corpus);
POSMikheevCounter posMikheev = new POSMikheevCounter("posMikheev");
posMikheev.buildTable(TestPosHelper.corpus);
POSWindow posWindowPOS = new POSWindow("posWindowPOS");
POSWindow posWindowPOSBaseLine = new POSWindow("posWindowPOSBaseLine", posBaseLine);
POSWindow posWindowPOSMikheev = new POSWindow("posWindowPOSMikheev", posMikheev);
// Test when using POS View
ArrayList<Set<Feature>> featslist = new ArrayList<>();
for (Constituent test : testlist) featslist.add(posWindowPOS.getFeatures(test));
if (featslist.isEmpty()) {
logger.info("Feats list is returning NULL.");
}
logger.info("\n" + "Test when using POS View");
logger.info("Printing list of Feature set");
for (Set<Feature> feats : featslist) {
logger.info("\n");
for (Feature f : feats) logger.info(f.getName());
}
// Test when using POS baseline Counting
featslist.clear();
for (Constituent test : testlist) featslist.add(posWindowPOSBaseLine.getFeatures(test));
if (featslist.isEmpty()) {
logger.info("Feats list is returning NULL.");
}
logger.info("\n" + "Test when using POS baseline Counting");
logger.info("Printing list of Feature set");
for (Set<Feature> feats : featslist) {
logger.info("\n");
for (Feature f : feats) logger.info(f.getName());
}
// Test when using POS Mikheev Counting
featslist.clear();
for (Constituent test : testlist) featslist.add(posWindowPOSMikheev.getFeatures(test));
if (featslist.isEmpty()) {
logger.info("Feats list is returning NULL.");
}
logger.info("\n" + "Test when using POS Mikheev Counting");
logger.info("Printing list of Feature set");
for (Set<Feature> feats : featslist) {
logger.info("\n");
for (Feature f : feats) logger.info(f.getName());
}
logger.info("GOT FEATURES YES!");
}
Aggregations