Search in sources :

Example 91 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class TestLabelTwoBefore method test.

@Test
public final void test() throws Exception {
    logger.info("LabelTwoBefore Feature Extractor");
    // Using the first TA and a constituent between span of 30-40 as a test
    TextAnnotation ta = tas.get(2);
    View TOKENS = ta.getView("TOKENS");
    logger.info("GOT TOKENS FROM TEXTAnn");
    List<Constituent> testlist = TOKENS.getConstituentsCoveringSpan(0, 20);
    for (Constituent c : testlist) {
        logger.info(c.getSurfaceForm());
    }
    logger.info("Testlist size is " + testlist.size());
    // Constituent test = testlist.get(1);
    // logger.info("The constituent we are extracting features from
    // in this test is: " + test.getSurfaceForm());
    POSBaseLineCounter posBaseLine = new POSBaseLineCounter("posBaseLine");
    posBaseLine.buildTable(TestPosHelper.corpus);
    POSMikheevCounter posMikheev = new POSMikheevCounter("posMikheev");
    posMikheev.buildTable(TestPosHelper.corpus);
    LabelTwoBefore l2bPOS = new LabelTwoBefore("l2bPOS");
    LabelTwoBefore l2bPOSBaseLine = new LabelTwoBefore("l2bPOSBaseLine", posBaseLine);
    LabelTwoBefore l2bPOSMikheev = new LabelTwoBefore("l2bPOSMikheev", posMikheev);
    // Test when using POS View
    ArrayList<Set<Feature>> featslist = new ArrayList<>();
    for (Constituent test : testlist) featslist.add(l2bPOS.getFeatures(test));
    if (featslist.isEmpty()) {
        logger.info("Feats list is returning NULL.");
    }
    logger.info("\n" + "Test when using POS View");
    logger.info("Printing list of Feature set");
    for (Set<Feature> feats : featslist) {
        for (Feature f : feats) logger.info(f.getName());
    }
    // Test when using POS baseline Counting
    featslist.clear();
    for (Constituent test : testlist) featslist.add(l2bPOSBaseLine.getFeatures(test));
    if (featslist.isEmpty()) {
        logger.info("Feats list is returning NULL.");
    }
    logger.info("\n" + "Test when using POS baseline Counting");
    logger.info("Printing list of Feature set");
    for (Set<Feature> feats : featslist) {
        for (Feature f : feats) logger.info(f.getName());
    }
    // Test when using POS Mikheev Counting
    featslist.clear();
    for (Constituent test : testlist) featslist.add(l2bPOSMikheev.getFeatures(test));
    if (featslist.isEmpty()) {
        logger.info("Feats list is returning NULL.");
    }
    logger.info("\n" + "Test when using POS Mikheev Counting");
    logger.info("Printing list of Feature set");
    for (Set<Feature> feats : featslist) {
        for (Feature f : feats) logger.info(f.getName());
    }
    logger.info("GOT FEATURES YES!");
}
Also used : Set(java.util.Set) ArrayList(java.util.ArrayList) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) POSBaseLineCounter(edu.illinois.cs.cogcomp.edison.utilities.POSBaseLineCounter) POSMikheevCounter(edu.illinois.cs.cogcomp.edison.utilities.POSMikheevCounter) Test(org.junit.Test)

Example 92 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class TestMixedChunkWindowTwoBeforePOSWindowThreeBefore method test.

@Test
public final void test() throws EdisonException {
    log.debug("SOPREVIOUS");
    // Using the first TA and a constituent between span of 0-20 as a test
    TextAnnotation ta = tas.get(3);
    View TOKENS = ta.getView("TOKENS");
    log.debug("GOT TOKENS FROM TEXTAnn");
    List<Constituent> testlist = TOKENS.getConstituentsCoveringSpan(0, 20);
    for (Constituent c : testlist) {
        log.debug(c.getSurfaceForm());
    }
    log.debug("Testlist size is " + testlist.size());
    Constituent test = testlist.get(5);
    log.debug("The constituent we are extracting features from in this test is: " + test.getSurfaceForm());
    MixedChunkWindowTwoBeforePOSWindowThreeBefore SOP = new MixedChunkWindowTwoBeforePOSWindowThreeBefore("MixedChunkWindowTwoBeforePOSWindowThreeBefore");
    Set<Feature> feats = SOP.getFeatures(test);
    String[] expected_outputs = { "MixedChunkWindowTwoBeforePOSWindowThreeBefore:ll(NP_VP)", "MixedChunkWindowTwoBeforePOSWindowThreeBefore:lt1(NP_RB)", "MixedChunkWindowTwoBeforePOSWindowThreeBefore:lt2VP_VBN" };
    if (feats == null) {
        log.debug("Feats are returning NULL.");
    }
    log.debug("Printing Set of Features");
    for (Feature f : feats) {
        log.debug(f.getName());
        assert (ArrayUtils.contains(expected_outputs, f.getName()));
    }
// System.exit(0);
}
Also used : TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) PredicateArgumentView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) Test(org.junit.Test)

Example 93 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class SimpleGazetteerAnnotatorTest method testAddView.

/**
     * Test method for
     * {@link edu.illinois.cs.cogcomp.edison.annotators.SimpleGazetteerAnnotator#addView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)}
     * .
     * 
     * @throws URISyntaxException
     * @throws IOException
     * @throws AnnotatorException
     */
@Test
public void testAddView() throws IOException, URISyntaxException, AnnotatorException {
    SimpleGazetteerAnnotator sga = new SimpleGazetteerAnnotator(defaultRm);
    assertTrue("Wrong number of dictionaries loaded.", sga.dictionaries.size() == 1);
    assertTrue("Wrong number of dictionaries loaded.", sga.dictionariesIgnoreCase.size() == 1);
    TextAnnotation ta = tab.createTextAnnotation("I hail from the university of illinois at champaign urbana.");
    sga.addView(ta);
    SpanLabelView view = (SpanLabelView) ta.getView(ViewNames.TREE_GAZETTEER);
    List<Constituent> entities = view.getConstituents();
    Constituent c1 = entities.get(0);
    assertEquals(c1.toString(), "university of illinois");
    Constituent c2 = entities.get(1);
    assertEquals(c2.toString(), "university of illinois at champaign urbana");
    Constituent c3 = entities.get(2);
    assertEquals(c3.toString(), "illinois");
    Constituent c4 = entities.get(3);
    assertEquals(c4.toString(), "champaign");
    Constituent c5 = entities.get(4);
    assertEquals(c5.toString(), "urbana");
    assertEquals(c1.getLabel(), "organizations(IC)");
    assertEquals(c2.getLabel(), "organizations(IC)");
    assertEquals(c3.getLabel(), "places(IC)");
    assertEquals(c4.getLabel(), "places(IC)");
    assertEquals(c5.getLabel(), "places(IC)");
    Properties props = new Properties();
    props.setProperty(SimpleGazetteerAnnotatorConfigurator.PHRASE_LENGTH.key, "4");
    props.setProperty(SimpleGazetteerAnnotatorConfigurator.PATH_TO_DICTIONARIES.key, "/testgazetteers/");
    props.setProperty(SimpleGazetteerAnnotatorConfigurator.IS_LAZILY_INITIALIZED.key, SimpleGazetteerAnnotatorConfigurator.FALSE);
    sga = new SimpleGazetteerAnnotator(new ResourceManager(props));
    assertTrue("Wrong number of dictionaries loaded.", sga.dictionaries.size() == 1);
    assertTrue("Wrong number of dictionaries loaded.", sga.dictionariesIgnoreCase.size() == 1);
    ta = tab.createTextAnnotation("I hail from the university of illinois at champaign urbana.");
    sga.addView(ta);
    view = (SpanLabelView) ta.getView(ViewNames.TREE_GAZETTEER);
    entities = view.getConstituents();
    c1 = entities.get(0);
    assertEquals(c1.toString(), "university of illinois");
    c2 = entities.get(1);
    assertEquals(c2.toString(), "illinois");
    c3 = entities.get(2);
    assertEquals(c3.toString(), "champaign");
    c4 = entities.get(3);
    assertEquals(c4.toString(), "urbana");
    assertEquals(c1.getLabel(), "organizations(IC)");
    assertEquals(c2.getLabel(), "places(IC)");
    assertEquals(c3.getLabel(), "places(IC)");
    assertEquals(c4.getLabel(), "places(IC)");
    ta = tab.createTextAnnotation("I hail from the University of Illinois at champaign urbana.");
    sga.addView(ta);
    view = (SpanLabelView) ta.getView(ViewNames.TREE_GAZETTEER);
    entities = view.getConstituents();
    c1 = entities.get(0);
    assertEquals(c1.toString(), "University of Illinois");
    assertEquals(c1.getLabel(), "organizations");
    c2 = entities.get(1);
    assertEquals(c1.toString(), "University of Illinois");
    assertEquals(c1.getLabel(), "organizations");
}
Also used : ResourceManager(edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) Properties(java.util.Properties) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) Test(org.junit.Test)

Example 94 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class TestContextFeatureExtractor method testGetFeaturesNoIndexWithConstituent.

@Test
public void testGetFeaturesNoIndexWithConstituent() throws EdisonException {
    ContextFeatureExtractor fex = new ContextFeatureExtractor(2, false, false);
    fex.addFeatureExtractor(new WordFeatureExtractor() {

        @Override
        public Set<Feature> getWordFeatures(TextAnnotation ta, int wordPosition) throws EdisonException {
            String s = WordHelpers.getWord(ta, wordPosition).toLowerCase();
            Set<String> ss = new HashSet<>();
            ss.add(s);
            return FeatureUtilities.getFeatures(ss);
        }
    });
    TextAnnotation ta = TextAnnotationUtilities.createFromTokenizedString("This is a test for the feature extractor .");
    Constituent c1 = new Constituent("", "", ta, 2, 3);
    Set<String> c1fs = new HashSet<>();
    c1fs.addAll(Arrays.asList("context:#word#:this", "context:#word#:is", "context:#word#:a", "context:#word#:test", "context:#word#:for"));
    Set<Feature> c1f = FeatureUtilities.getFeatures(c1fs);
    c1f.removeAll(fex.getFeatures(c1));
    assertEquals(0, c1f.size());
    Constituent c2 = new Constituent("", "", ta, 2, 4);
    Set<String> c2fs = new HashSet<>();
    c2fs.addAll(Arrays.asList("context:#word#:this", "context:#word#:is", "context:#word#:a", "context:#word#:test", "context:#word#:for", "context:#word#:the"));
    Set<Feature> c2f = FeatureUtilities.getFeatures(c2fs);
    c2f.removeAll(fex.getFeatures(c2));
    assertEquals(0, c2f.size());
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 95 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class TestContextFeatureExtractor method testGetFeaturesNoIndexWithoutConstituent.

@Test
public void testGetFeaturesNoIndexWithoutConstituent() throws EdisonException {
    ContextFeatureExtractor fex = new ContextFeatureExtractor(2, false, true);
    fex.addFeatureExtractor(new WordFeatureExtractor() {

        @Override
        public Set<Feature> getWordFeatures(TextAnnotation ta, int wordPosition) throws EdisonException {
            String s = WordHelpers.getWord(ta, wordPosition).toLowerCase();
            Set<String> ss = new HashSet<>();
            ss.add(s);
            return FeatureUtilities.getFeatures(ss);
        }
    });
    TextAnnotation ta = TextAnnotationUtilities.createFromTokenizedString("This is a test for the feature extractor .");
    Constituent c1 = new Constituent("", "", ta, 2, 3);
    Set<String> c1fs = new HashSet<>();
    c1fs.addAll(Arrays.asList("context:#word#:this", "context:#word#:is", "context:#word#:test", "context:#word#:for"));
    Set<Feature> c1f = FeatureUtilities.getFeatures(c1fs);
    c1f.removeAll(fex.getFeatures(c1));
    assertEquals(0, c1f.size());
    Constituent c2 = new Constituent("", "", ta, 2, 4);
    Set<String> c2fs = new HashSet<>();
    c2fs.addAll(Arrays.asList("context:#word#:this", "context:#word#:is", "context:#word#:for", "context:#word#:the"));
    Set<Feature> c2f = FeatureUtilities.getFeatures(c2fs);
    c2f.removeAll(fex.getFeatures(c2));
    assertEquals(0, c2f.size());
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)176 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)95 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)51 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)44 Test (org.junit.Test)39 ArrayList (java.util.ArrayList)29 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)25 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)24 LinkedHashSet (java.util.LinkedHashSet)22 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)20 DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)20 FeatureExtractor (edu.illinois.cs.cogcomp.edison.features.FeatureExtractor)17 ProjectedPath (edu.illinois.cs.cogcomp.edison.features.lrec.ProjectedPath)16 FeatureManifest (edu.illinois.cs.cogcomp.edison.features.manifest.FeatureManifest)16 FileInputStream (java.io.FileInputStream)16 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)14 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)13 SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)12 HashSet (java.util.HashSet)12 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)11