use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class TestPOSMikheevFeatureExtractor method test.
@Test
public final void test() throws Exception {
POSMikheevFeatureExtractor posMikheev = new POSMikheevFeatureExtractor("posMikheev", "test_corpus", TestPosHelper.corpus);
logger.info("POSMikheev Feature Extractor");
logger.info("Only print the features with known tags");
// Using the first TA and a constituent between span of 30-40 as a test
int i = 0;
for (TextAnnotation ta : tas) {
ArrayList<String> outFeatures = new ArrayList<>();
View TOKENS = ta.getView("TOKENS");
for (Constituent TOKEN : TOKENS) {
Set<Feature> feats = posMikheev.getFeatures(TOKEN);
if (feats.isEmpty()) {
logger.info("Feats list is returning NULL.");
}
for (Feature f : feats) if (!f.getName().contains("UNKNOWN")) {
outFeatures.add(f.getName());
}
}
if (!outFeatures.isEmpty()) {
logger.info("-------------------------------------------------------");
logger.info("Text Annotation: " + i);
logger.info("Text Features: ");
for (String out : outFeatures) logger.info(out);
logger.info("-------------------------------------------------------");
}
i++;
}
logger.info("GOT FEATURES YES!");
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class TestPOSWindowTwo method test.
@Test
public final void test() throws EdisonException {
log.debug("POSWindowTwo Feature Extractor");
// Using the first TA and a constituent between span of 30-40 as a test
TextAnnotation ta = tas.get(2);
View TOKENS = ta.getView("TOKENS");
List<Constituent> testlist = TOKENS.getConstituentsCoveringSpan(0, 20);
for (Constituent c : testlist) {
log.debug(c.getSurfaceForm());
}
Constituent test = testlist.get(1);
log.debug("The constituent we are extracting features from in this test is: " + test.getSurfaceForm());
POSWindowTwo POSW = new POSWindowTwo("POSWindowTwo");
log.debug("Startspan is " + test.getStartSpan() + " and Endspan is " + test.getEndSpan());
Set<Feature> feats = POSW.getFeatures(test);
String[] expected_outputs = { "POSWindowTwo:0(DT)", "POSWindowTwo:1(VBZ)", "POSWindowTwo:2(DT)", "POSWindowTwo:3(NN)" };
if (feats == null) {
log.debug("Feats are returning NULL.");
}
log.debug("Printing Set of Features");
for (Feature f : feats) {
assert (ArrayUtils.contains(expected_outputs, f.getName()));
}
// System.exit(0);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class TestWordConjunctionOneTwoThreeGramWindowTwo method test.
@Test
public final void test() throws EdisonException {
// Using the 3rd constituent as a test
List<Constituent> testList = ta.getView("TOKENS").getConstituents();
Constituent test = testList.get(3);
WordConjunctionOneTwoThreeGramWindowTwo fex = new WordConjunctionOneTwoThreeGramWindowTwo("WordConj3GramWin2");
Set<Feature> feats = fex.getFeatures(test);
String[] expected_outputs = { "WordConjunctionOneTwoThreeGramWindowTwo:-2_1(construction)", "WordConjunctionOneTwoThreeGramWindowTwo:-1_1(of)", "WordConjunctionOneTwoThreeGramWindowTwo:0_1(the)", "WordConjunctionOneTwoThreeGramWindowTwo:1_1(John)", "WordConjunctionOneTwoThreeGramWindowTwo:2_1(Smith)", "WordConjunctionOneTwoThreeGramWindowTwo:-2_2(construction_of)", "WordConjunctionOneTwoThreeGramWindowTwo:-1_2(of_the)", "WordConjunctionOneTwoThreeGramWindowTwo:0_2(the_John)", "WordConjunctionOneTwoThreeGramWindowTwo:1_2(John_Smith)", "WordConjunctionOneTwoThreeGramWindowTwo:2_2(Smith)", "WordConjunctionOneTwoThreeGramWindowTwo:-2_3(construction_of_the)", "WordConjunctionOneTwoThreeGramWindowTwo:-1_3(of_the_John)", "WordConjunctionOneTwoThreeGramWindowTwo:0_3(the_John_Smith)", "WordConjunctionOneTwoThreeGramWindowTwo:1_3(John_Smith)", "WordConjunctionOneTwoThreeGramWindowTwo:2_3(Smith)" };
if (feats == null)
fail("Feats are returning NULL.");
for (Feature f : feats) {
assertTrue(ArrayUtils.contains(expected_outputs, f.getName()));
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class TestSrlNomIdentifier method test.
/**
* Only in and out relations in the SRL_VERB view are used for the purpose of testing.
*/
public final void test() throws Exception {
logger.info("Nom_Identifier Feature Extractor");
String[] viewsToAdd = { ViewNames.POS, ViewNames.LEMMA, ViewNames.SHALLOW_PARSE, ViewNames.PARSE_GOLD, ViewNames.SRL_VERB, ViewNames.PARSE_STANFORD, ViewNames.NER_CONLL };
TextAnnotation ta = DummyTextAnnotationGenerator.generateAnnotatedTextAnnotation(viewsToAdd, true, 3);
ta.addView(ClauseViewGenerator.STANFORD);
ta.addView(PseudoParse.STANFORD);
logger.info("This textannotation annotates the text: \n" + ta.getText());
View SRL_VERB = ta.getView("SRL_VERB");
List<Constituent> testlist = SRL_VERB.getConstituentsCoveringSpan(10, 13);
testlist.addAll(SRL_VERB.getConstituentsCoveringSpan(26, 27));
FeatureManifest featureManifest;
FeatureExtractor fex;
String fileName = Constant.prefix + "/Nom/Identifier/nom-identifier.fex";
featureManifest = new FeatureManifest(new FileInputStream(fileName));
FeatureManifest.setFeatureExtractor("hyphen-argument-feature", FeatureGenerators.hyphenTagFeature);
FeatureManifest.setTransformer("parse-left-sibling", FeatureGenerators.getParseLeftSibling(ViewNames.PARSE_STANFORD));
FeatureManifest.setTransformer("parse-right-sibling", FeatureGenerators.getParseRightSibling(ViewNames.PARSE_STANFORD));
FeatureManifest.setFeatureExtractor("pp-features", FeatureGenerators.ppFeatures(ViewNames.PARSE_STANFORD));
FeatureManifest.setFeatureExtractor("projected-path", new ProjectedPath(ViewNames.PARSE_STANFORD));
featureManifest.useCompressedName();
featureManifest.setVariable("*default-parser*", ViewNames.PARSE_STANFORD);
fex = featureManifest.createFex();
SrlNomIdentifier ni = new SrlNomIdentifier();
for (Constituent test : testlist) {
assertTrue(SRLFeaturesComparator.isEqual(test, fex, ni));
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class TestContextFeatureExtractor method testGetFeaturesIndexWithoutConstituent.
@Test
public void testGetFeaturesIndexWithoutConstituent() throws EdisonException {
ContextFeatureExtractor fex = new ContextFeatureExtractor(2, true, true);
fex.addFeatureExtractor(new WordFeatureExtractor() {
@Override
public Set<Feature> getWordFeatures(TextAnnotation ta, int wordPosition) throws EdisonException {
String s = WordHelpers.getWord(ta, wordPosition).toLowerCase();
Set<Feature> ss = new HashSet<>();
ss.add(DiscreteFeature.create(s));
return ss;
}
});
TextAnnotation ta = TextAnnotationUtilities.createFromTokenizedString("This is a test for the feature extractor .");
Constituent c1 = new Constituent("", "", ta, 2, 3);
Set<String> c1fs = new HashSet<>();
c1fs.addAll(Arrays.asList("context-2:#word#:this", "context-1:#word#:is", "context1:#word#:test", "context2:#word#:for"));
Set<Feature> c1f = FeatureUtilities.getFeatures(c1fs);
Set<Feature> features = fex.getFeatures(c1);
c1f.removeAll(features);
assertEquals(0, c1f.size());
Constituent c2 = new Constituent("", "", ta, 2, 4);
Set<String> c2fs = new HashSet<>();
c2fs.addAll(Arrays.asList("context-2:#word#:this", "context-1:#word#:is", "context1:#word#:for", "context2:#word#:the"));
Set<Feature> c2f = FeatureUtilities.getFeatures(c2fs);
c2f.removeAll(fex.getFeatures(c2));
assertEquals(0, c2f.size());
}
Aggregations