use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class TestLabelTwoBefore method test.
@Test
public final void test() throws Exception {
logger.info("LabelTwoBefore Feature Extractor");
// Using the first TA and a constituent between span of 30-40 as a test
TextAnnotation ta = tas.get(2);
View TOKENS = ta.getView("TOKENS");
logger.info("GOT TOKENS FROM TEXTAnn");
List<Constituent> testlist = TOKENS.getConstituentsCoveringSpan(0, 20);
for (Constituent c : testlist) {
logger.info(c.getSurfaceForm());
}
logger.info("Testlist size is " + testlist.size());
// Constituent test = testlist.get(1);
// logger.info("The constituent we are extracting features from
// in this test is: " + test.getSurfaceForm());
POSBaseLineCounter posBaseLine = new POSBaseLineCounter("posBaseLine");
posBaseLine.buildTable(TestPosHelper.corpus);
POSMikheevCounter posMikheev = new POSMikheevCounter("posMikheev");
posMikheev.buildTable(TestPosHelper.corpus);
LabelTwoBefore l2bPOS = new LabelTwoBefore("l2bPOS");
LabelTwoBefore l2bPOSBaseLine = new LabelTwoBefore("l2bPOSBaseLine", posBaseLine);
LabelTwoBefore l2bPOSMikheev = new LabelTwoBefore("l2bPOSMikheev", posMikheev);
// Test when using POS View
ArrayList<Set<Feature>> featslist = new ArrayList<>();
for (Constituent test : testlist) featslist.add(l2bPOS.getFeatures(test));
if (featslist.isEmpty()) {
logger.info("Feats list is returning NULL.");
}
logger.info("\n" + "Test when using POS View");
logger.info("Printing list of Feature set");
for (Set<Feature> feats : featslist) {
for (Feature f : feats) logger.info(f.getName());
}
// Test when using POS baseline Counting
featslist.clear();
for (Constituent test : testlist) featslist.add(l2bPOSBaseLine.getFeatures(test));
if (featslist.isEmpty()) {
logger.info("Feats list is returning NULL.");
}
logger.info("\n" + "Test when using POS baseline Counting");
logger.info("Printing list of Feature set");
for (Set<Feature> feats : featslist) {
for (Feature f : feats) logger.info(f.getName());
}
// Test when using POS Mikheev Counting
featslist.clear();
for (Constituent test : testlist) featslist.add(l2bPOSMikheev.getFeatures(test));
if (featslist.isEmpty()) {
logger.info("Feats list is returning NULL.");
}
logger.info("\n" + "Test when using POS Mikheev Counting");
logger.info("Printing list of Feature set");
for (Set<Feature> feats : featslist) {
for (Feature f : feats) logger.info(f.getName());
}
logger.info("GOT FEATURES YES!");
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class TestMixedChunkWindowTwoBeforePOSWindowThreeBefore method test.
@Test
public final void test() throws EdisonException {
log.debug("SOPREVIOUS");
// Using the first TA and a constituent between span of 0-20 as a test
TextAnnotation ta = tas.get(3);
View TOKENS = ta.getView("TOKENS");
log.debug("GOT TOKENS FROM TEXTAnn");
List<Constituent> testlist = TOKENS.getConstituentsCoveringSpan(0, 20);
for (Constituent c : testlist) {
log.debug(c.getSurfaceForm());
}
log.debug("Testlist size is " + testlist.size());
Constituent test = testlist.get(5);
log.debug("The constituent we are extracting features from in this test is: " + test.getSurfaceForm());
MixedChunkWindowTwoBeforePOSWindowThreeBefore SOP = new MixedChunkWindowTwoBeforePOSWindowThreeBefore("MixedChunkWindowTwoBeforePOSWindowThreeBefore");
Set<Feature> feats = SOP.getFeatures(test);
String[] expected_outputs = { "MixedChunkWindowTwoBeforePOSWindowThreeBefore:ll(NP_VP)", "MixedChunkWindowTwoBeforePOSWindowThreeBefore:lt1(NP_RB)", "MixedChunkWindowTwoBeforePOSWindowThreeBefore:lt2VP_VBN" };
if (feats == null) {
log.debug("Feats are returning NULL.");
}
log.debug("Printing Set of Features");
for (Feature f : feats) {
log.debug(f.getName());
assert (ArrayUtils.contains(expected_outputs, f.getName()));
}
// System.exit(0);
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class WriteSVMLightFormat method writeFeatureExample.
/**
* Generate a String value corresponding to a SVMLight format example from Edison feature
* representation (String label and Collection of Feature). Stores a mapping between features
* and integer ids so that when a new example is passed in, if a feature is the same as an
* example that has already been processed, that feature will get the same integer id as it did
* previously. From SVMLight documentation, binary classification labels must be {-1,1} for
* binary problem, but otherwise labels are just integers -- one integer per class for
* multi-class problem integer rank for ranking problem Apparently, no problem for label to have
* same integer id as feature value
*
* Assumes sparse boolean feature representation (if a feature is active, it has a unique string
* identifier in the Collection argument). IMPORTANT: feature ids MUST be written out in
* ascending order.
*
* @param label
* @param activeFeatures
* @return
*/
public String writeFeatureExample(String label, Collection<Feature> activeFeatures) {
int lab = getLabel(label);
int[] featIds = new int[activeFeatures.size()];
int index = 0;
for (Feature f : activeFeatures) featIds[index++] = featureLex.getFeatureId(f.getName());
Arrays.sort(featIds);
StringBuilder bldr = new StringBuilder();
bldr.append(lab);
for (int featId : featIds) bldr.append(" ").append(featId).append(":1");
return bldr.toString();
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class TestWordFeatureFactory method testFeatureCollection.
@Test
public final void testFeatureCollection() throws Exception {
FeatureCollection f = new FeatureCollection("features");
f.addFeatureExtractor(WordFeatureExtractorFactory.conflatedPOS);
f.addFeatureExtractor(WordFeatureExtractorFactory.gerundMarker);
f.addFeatureExtractor(WordFeatureExtractorFactory.nominalizationMarker);
logger.info("\tTesting feature collection");
Map<Integer, String> map = IOUtils.readObjectAsResource(TestWordFeatureFactory.class, "feature.collection.test");
for (TextAnnotation ta : tas) {
for (int tokenId = 0; tokenId < ta.size(); tokenId++) {
Constituent c = new Constituent("", "", ta, tokenId, tokenId + 1);
Set<Feature> features = f.getFeatures(c);
if (features.size() > 0) {
String id = ta.getTokenizedText() + ":" + tokenId;
assertEquals(map.get(id.hashCode()), features.toString());
}
}
}
}
use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.
the class TextStatistics method consume.
@Override
protected void consume(TextAnnotation ta) {
for (Constituent c : constituentGenerator.transform(ta)) {
try {
Set<Feature> feats = fex.getFeatures(c);
for (Feature feat : feats) {
count(feat);
}
constituentCounter.incrementAndGet();
} catch (EdisonException e) {
e.printStackTrace();
}
}
textCounter.incrementAndGet();
}
Aggregations