use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class TestLabelTwoBefore method test.
@Test
public final void test() throws Exception {
logger.info("LabelTwoBefore Feature Extractor");
// Using the first TA and a constituent between span of 30-40 as a test
TextAnnotation ta = tas.get(2);
View TOKENS = ta.getView("TOKENS");
logger.info("GOT TOKENS FROM TEXTAnn");
List<Constituent> testlist = TOKENS.getConstituentsCoveringSpan(0, 20);
for (Constituent c : testlist) {
logger.info(c.getSurfaceForm());
}
logger.info("Testlist size is " + testlist.size());
// Constituent test = testlist.get(1);
// logger.info("The constituent we are extracting features from
// in this test is: " + test.getSurfaceForm());
POSBaseLineCounter posBaseLine = new POSBaseLineCounter("posBaseLine");
posBaseLine.buildTable(TestPosHelper.corpus);
POSMikheevCounter posMikheev = new POSMikheevCounter("posMikheev");
posMikheev.buildTable(TestPosHelper.corpus);
LabelTwoBefore l2bPOS = new LabelTwoBefore("l2bPOS");
LabelTwoBefore l2bPOSBaseLine = new LabelTwoBefore("l2bPOSBaseLine", posBaseLine);
LabelTwoBefore l2bPOSMikheev = new LabelTwoBefore("l2bPOSMikheev", posMikheev);
// Test when using POS View
ArrayList<Set<Feature>> featslist = new ArrayList<>();
for (Constituent test : testlist) featslist.add(l2bPOS.getFeatures(test));
if (featslist.isEmpty()) {
logger.info("Feats list is returning NULL.");
}
logger.info("\n" + "Test when using POS View");
logger.info("Printing list of Feature set");
for (Set<Feature> feats : featslist) {
for (Feature f : feats) logger.info(f.getName());
}
// Test when using POS baseline Counting
featslist.clear();
for (Constituent test : testlist) featslist.add(l2bPOSBaseLine.getFeatures(test));
if (featslist.isEmpty()) {
logger.info("Feats list is returning NULL.");
}
logger.info("\n" + "Test when using POS baseline Counting");
logger.info("Printing list of Feature set");
for (Set<Feature> feats : featslist) {
for (Feature f : feats) logger.info(f.getName());
}
// Test when using POS Mikheev Counting
featslist.clear();
for (Constituent test : testlist) featslist.add(l2bPOSMikheev.getFeatures(test));
if (featslist.isEmpty()) {
logger.info("Feats list is returning NULL.");
}
logger.info("\n" + "Test when using POS Mikheev Counting");
logger.info("Printing list of Feature set");
for (Set<Feature> feats : featslist) {
for (Feature f : feats) logger.info(f.getName());
}
logger.info("GOT FEATURES YES!");
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class TestMixedChunkWindowTwoBeforePOSWindowThreeBefore method test.
@Test
public final void test() throws EdisonException {
log.debug("SOPREVIOUS");
// Using the first TA and a constituent between span of 0-20 as a test
TextAnnotation ta = tas.get(3);
View TOKENS = ta.getView("TOKENS");
log.debug("GOT TOKENS FROM TEXTAnn");
List<Constituent> testlist = TOKENS.getConstituentsCoveringSpan(0, 20);
for (Constituent c : testlist) {
log.debug(c.getSurfaceForm());
}
log.debug("Testlist size is " + testlist.size());
Constituent test = testlist.get(5);
log.debug("The constituent we are extracting features from in this test is: " + test.getSurfaceForm());
MixedChunkWindowTwoBeforePOSWindowThreeBefore SOP = new MixedChunkWindowTwoBeforePOSWindowThreeBefore("MixedChunkWindowTwoBeforePOSWindowThreeBefore");
Set<Feature> feats = SOP.getFeatures(test);
String[] expected_outputs = { "MixedChunkWindowTwoBeforePOSWindowThreeBefore:ll(NP_VP)", "MixedChunkWindowTwoBeforePOSWindowThreeBefore:lt1(NP_RB)", "MixedChunkWindowTwoBeforePOSWindowThreeBefore:lt2VP_VBN" };
if (feats == null) {
log.debug("Feats are returning NULL.");
}
log.debug("Printing Set of Features");
for (Feature f : feats) {
log.debug(f.getName());
assert (ArrayUtils.contains(expected_outputs, f.getName()));
}
// System.exit(0);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class SimpleGazetteerAnnotatorTest method testAddView.
/**
* Test method for
* {@link edu.illinois.cs.cogcomp.edison.annotators.SimpleGazetteerAnnotator#addView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)}
* .
*
* @throws URISyntaxException
* @throws IOException
* @throws AnnotatorException
*/
@Test
public void testAddView() throws IOException, URISyntaxException, AnnotatorException {
SimpleGazetteerAnnotator sga = new SimpleGazetteerAnnotator(defaultRm);
assertTrue("Wrong number of dictionaries loaded.", sga.dictionaries.size() == 1);
assertTrue("Wrong number of dictionaries loaded.", sga.dictionariesIgnoreCase.size() == 1);
TextAnnotation ta = tab.createTextAnnotation("I hail from the university of illinois at champaign urbana.");
sga.addView(ta);
SpanLabelView view = (SpanLabelView) ta.getView(ViewNames.TREE_GAZETTEER);
List<Constituent> entities = view.getConstituents();
Constituent c1 = entities.get(0);
assertEquals(c1.toString(), "university of illinois");
Constituent c2 = entities.get(1);
assertEquals(c2.toString(), "university of illinois at champaign urbana");
Constituent c3 = entities.get(2);
assertEquals(c3.toString(), "illinois");
Constituent c4 = entities.get(3);
assertEquals(c4.toString(), "champaign");
Constituent c5 = entities.get(4);
assertEquals(c5.toString(), "urbana");
assertEquals(c1.getLabel(), "organizations(IC)");
assertEquals(c2.getLabel(), "organizations(IC)");
assertEquals(c3.getLabel(), "places(IC)");
assertEquals(c4.getLabel(), "places(IC)");
assertEquals(c5.getLabel(), "places(IC)");
Properties props = new Properties();
props.setProperty(SimpleGazetteerAnnotatorConfigurator.PHRASE_LENGTH.key, "4");
props.setProperty(SimpleGazetteerAnnotatorConfigurator.PATH_TO_DICTIONARIES.key, "/testgazetteers/");
props.setProperty(SimpleGazetteerAnnotatorConfigurator.IS_LAZILY_INITIALIZED.key, SimpleGazetteerAnnotatorConfigurator.FALSE);
sga = new SimpleGazetteerAnnotator(new ResourceManager(props));
assertTrue("Wrong number of dictionaries loaded.", sga.dictionaries.size() == 1);
assertTrue("Wrong number of dictionaries loaded.", sga.dictionariesIgnoreCase.size() == 1);
ta = tab.createTextAnnotation("I hail from the university of illinois at champaign urbana.");
sga.addView(ta);
view = (SpanLabelView) ta.getView(ViewNames.TREE_GAZETTEER);
entities = view.getConstituents();
c1 = entities.get(0);
assertEquals(c1.toString(), "university of illinois");
c2 = entities.get(1);
assertEquals(c2.toString(), "illinois");
c3 = entities.get(2);
assertEquals(c3.toString(), "champaign");
c4 = entities.get(3);
assertEquals(c4.toString(), "urbana");
assertEquals(c1.getLabel(), "organizations(IC)");
assertEquals(c2.getLabel(), "places(IC)");
assertEquals(c3.getLabel(), "places(IC)");
assertEquals(c4.getLabel(), "places(IC)");
ta = tab.createTextAnnotation("I hail from the University of Illinois at champaign urbana.");
sga.addView(ta);
view = (SpanLabelView) ta.getView(ViewNames.TREE_GAZETTEER);
entities = view.getConstituents();
c1 = entities.get(0);
assertEquals(c1.toString(), "University of Illinois");
assertEquals(c1.getLabel(), "organizations");
c2 = entities.get(1);
assertEquals(c1.toString(), "University of Illinois");
assertEquals(c1.getLabel(), "organizations");
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class TestContextFeatureExtractor method testGetFeaturesNoIndexWithConstituent.
@Test
public void testGetFeaturesNoIndexWithConstituent() throws EdisonException {
ContextFeatureExtractor fex = new ContextFeatureExtractor(2, false, false);
fex.addFeatureExtractor(new WordFeatureExtractor() {
@Override
public Set<Feature> getWordFeatures(TextAnnotation ta, int wordPosition) throws EdisonException {
String s = WordHelpers.getWord(ta, wordPosition).toLowerCase();
Set<String> ss = new HashSet<>();
ss.add(s);
return FeatureUtilities.getFeatures(ss);
}
});
TextAnnotation ta = TextAnnotationUtilities.createFromTokenizedString("This is a test for the feature extractor .");
Constituent c1 = new Constituent("", "", ta, 2, 3);
Set<String> c1fs = new HashSet<>();
c1fs.addAll(Arrays.asList("context:#word#:this", "context:#word#:is", "context:#word#:a", "context:#word#:test", "context:#word#:for"));
Set<Feature> c1f = FeatureUtilities.getFeatures(c1fs);
c1f.removeAll(fex.getFeatures(c1));
assertEquals(0, c1f.size());
Constituent c2 = new Constituent("", "", ta, 2, 4);
Set<String> c2fs = new HashSet<>();
c2fs.addAll(Arrays.asList("context:#word#:this", "context:#word#:is", "context:#word#:a", "context:#word#:test", "context:#word#:for", "context:#word#:the"));
Set<Feature> c2f = FeatureUtilities.getFeatures(c2fs);
c2f.removeAll(fex.getFeatures(c2));
assertEquals(0, c2f.size());
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class TestContextFeatureExtractor method testGetFeaturesNoIndexWithoutConstituent.
@Test
public void testGetFeaturesNoIndexWithoutConstituent() throws EdisonException {
ContextFeatureExtractor fex = new ContextFeatureExtractor(2, false, true);
fex.addFeatureExtractor(new WordFeatureExtractor() {
@Override
public Set<Feature> getWordFeatures(TextAnnotation ta, int wordPosition) throws EdisonException {
String s = WordHelpers.getWord(ta, wordPosition).toLowerCase();
Set<String> ss = new HashSet<>();
ss.add(s);
return FeatureUtilities.getFeatures(ss);
}
});
TextAnnotation ta = TextAnnotationUtilities.createFromTokenizedString("This is a test for the feature extractor .");
Constituent c1 = new Constituent("", "", ta, 2, 3);
Set<String> c1fs = new HashSet<>();
c1fs.addAll(Arrays.asList("context:#word#:this", "context:#word#:is", "context:#word#:test", "context:#word#:for"));
Set<Feature> c1f = FeatureUtilities.getFeatures(c1fs);
c1f.removeAll(fex.getFeatures(c1));
assertEquals(0, c1f.size());
Constituent c2 = new Constituent("", "", ta, 2, 4);
Set<String> c2fs = new HashSet<>();
c2fs.addAll(Arrays.asList("context:#word#:this", "context:#word#:is", "context:#word#:for", "context:#word#:the"));
Set<Feature> c2f = FeatureUtilities.getFeatures(c2fs);
c2f.removeAll(fex.getFeatures(c2));
assertEquals(0, c2f.size());
}
Aggregations