use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class TestCorlex method test.
public final void test() throws EdisonException {
log.debug("Corlex Feature Extractor");
// Using the first TA and a constituent between span of 30-40 as a test
TextAnnotation ta = tas.get(1);
View TOKENS = ta.getView("TOKENS");
log.debug("Got tokens FROM TextAnnotation");
CorelexFeatureExtractor testInstance = new CorelexFeatureExtractor(true);
Set<Feature> feats = testInstance.getWordFeatures(ta, 1);
String[] expected_outputs = { "atr" };
if (feats == null) {
log.debug("Feats are returning NULL.");
}
log.debug("Printing Set of Features");
for (Feature f : feats) {
log.debug(f.getName());
assertTrue(ArrayUtils.contains(expected_outputs, f.getName()));
}
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class TestLabelOneBefore method test.
public final void test() throws Exception {
logger.info("LabelOneBefore Feature Extractor");
// Using the first TA and a constituent between span of 30-40 as a test
TextAnnotation ta = tas.get(2);
View TOKENS = ta.getView("TOKENS");
logger.info("GOT TOKENS FROM TEXTAnn");
List<Constituent> testlist = TOKENS.getConstituentsCoveringSpan(0, 20);
for (Constituent c : testlist) {
logger.info(c.getSurfaceForm());
}
logger.info("Testlist size is " + testlist.size());
// Constituent test = testlist.get(1);
// logger.info("The constituent we are extracting features from
// in this test is: " + test.getSurfaceForm());
POSBaseLineCounter posBaseLine = new POSBaseLineCounter("posBaseLine");
posBaseLine.buildTable(TestPosHelper.corpus);
POSMikheevCounter posMikheev = new POSMikheevCounter("posMikheev");
posMikheev.buildTable(TestPosHelper.corpus);
LabelOneBefore l1bPOS = new LabelOneBefore("l1bPOS");
LabelOneBefore l1bPOSBaseLine = new LabelOneBefore("l1bPOSBaseLine", posBaseLine);
LabelOneBefore l1bPOSMikheev = new LabelOneBefore("l1bPOSMikheev", posMikheev);
// Test when using POS View
ArrayList<Set<Feature>> featslist = new ArrayList<>();
for (Constituent test : testlist) featslist.add(l1bPOS.getFeatures(test));
if (featslist.isEmpty()) {
logger.info("Feats list is returning NULL.");
}
logger.info("\n" + "Test when using POS View");
logger.info("Printing list of Feature set");
for (Set<Feature> feats : featslist) {
for (Feature f : feats) logger.info(f.getName());
}
// Test when using POS baseline Counting
featslist.clear();
for (Constituent test : testlist) featslist.add(l1bPOSBaseLine.getFeatures(test));
if (featslist.isEmpty()) {
logger.info("Feats list is returning NULL.");
}
logger.info("\n" + "Test when using POS baseline Counting");
logger.info("Printing list of Feature set");
for (Set<Feature> feats : featslist) {
for (Feature f : feats) logger.info(f.getName());
}
// Test when using POS Mikheev Counting
featslist.clear();
for (Constituent test : testlist) featslist.add(l1bPOSMikheev.getFeatures(test));
if (featslist.isEmpty()) {
logger.info("Feats list is returning NULL.");
}
logger.info("\n" + "Test when using POS Mikheev Counting");
logger.info("Printing list of Feature set");
for (Set<Feature> feats : featslist) {
for (Feature f : feats) logger.info(f.getName());
}
logger.info("GOT FEATURES YES!");
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class GazetteerWindowTwo method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
Set<Feature> feats = new HashSet<>();
int i = 0;
View gazView = c.getTextAnnotation().getView(ViewNames.GAZETTEER_NE);
// suppose for now that gaz-ne view has a constituent for each gazetteer match -- either
// a single constituent for each word for each match, or a single constituent for each
// complete match
List<Constituent> overlapCons = gazView.getConstituentsCovering(c);
for (Constituent oc : overlapCons) {
/**
* assumes we are dealing with multi-token Gazetteer constituents; otherwise, must track
* match position of gazetteer entry in the single-token Constituent as a parameter
* (e.g. attributes are keyed on name of matched gazetteer, and value is the position of
* the gazetteer entry matched by the current token)
*/
int relativePosition = c.getStartSpan() - oc.getStartSpan();
String[] pieces = { getName(), ":", "(", Integer.toString(relativePosition), ")", oc.getLabel() };
feats.add(FeatureCreatorUtil.createFeatureFromArray(pieces));
}
return feats;
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class WordEmbeddingWindow method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
Set<Feature> features = new HashSet<>();
// get allowable window given position in text
IntPair relativeWindow = FeatureCreatorUtil.getWindowSpan(c, windowStart, windowEnd, ignoreSentenceBoundaries);
int absStart = c.getStartSpan() - relativeWindow.getFirst();
View tokens = c.getTextAnnotation().getView(ViewNames.TOKENS);
for (int i = relativeWindow.getFirst(); i <= relativeWindow.getSecond(); ++i) {
Constituent word = tokens.getConstituentsCoveringToken(absStart + i).get(0);
double[] embedding = WordEmbeddings.getEmbedding(word);
if (embedding != null) {
for (int dim = 0; dim < embedding.length; dim++) {
final String[] pieces = { getName(), ":", "place", Integer.toString(i), "dim", Integer.toString(dim), ":", Double.toString(embedding[dim]) };
features.add(FeatureCreatorUtil.createFeatureFromArray(pieces));
}
}
i++;
}
return features;
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class TestPOSBaseLineFeatureExtractor method test.
@Test
public final void test() throws Exception {
logger.info("POSBaseLine Feature Extractor");
// Using the first TA and a constituent between span of 30-40 as a test
TextAnnotation ta = tas.get(2);
View TOKENS = ta.getView("TOKENS");
logger.info("GOT TOKENS FROM TEXTAnn");
List<Constituent> testlist = TOKENS.getConstituentsCoveringSpan(0, 20);
for (Constituent c : testlist) {
logger.info(c.getSurfaceForm());
}
logger.info("Testlist size is " + testlist.size());
// Constituent test = testlist.get(1);
// logger.info("The constituent we are extracting features from
// in this test is: " + test.getSurfaceForm());
// logger.info(TestPOSBaseLineFeatureExtractor.class.getProtectionDomain().getCodeSource().getLocation());
// logger.info(System.getProperty("user.dir"));
POSBaseLineFeatureExtractor posBaseLine = new POSBaseLineFeatureExtractor("posBaseLine", "test_corpus", TestPosHelper.corpus);
ArrayList<Set<Feature>> featslist = new ArrayList<>();
for (Constituent test : testlist) featslist.add(posBaseLine.getFeatures(test));
if (featslist.isEmpty()) {
logger.info("Feats list is returning NULL.");
}
logger.info("Printing list of Feature set");
for (Set<Feature> feats : featslist) {
for (Feature f : feats) logger.info(f.getName());
}
/*
* Set<Feature> feats = posBaseLine.getFeatures(test);
*
* if (feats == null) { logger.info("Feats are returning NULL."); }
*
* logger.info("Printing Set of Features");
*
* for (Feature f : feats) { logger.info(f.getName()); }
*/
logger.info("GOT FEATURES YES!");
}
Aggregations