Search in sources :

Example 26 with Feature

use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.

the class TestCorlex method test.

public final void test() throws EdisonException {
    log.debug("Corlex Feature Extractor");
    // Using the first TA and a constituent between span of 30-40 as a test
    TextAnnotation ta = tas.get(1);
    View TOKENS = ta.getView("TOKENS");
    log.debug("Got tokens FROM TextAnnotation");
    CorelexFeatureExtractor testInstance = new CorelexFeatureExtractor(true);
    Set<Feature> feats = testInstance.getWordFeatures(ta, 1);
    String[] expected_outputs = { "atr" };
    if (feats == null) {
        log.debug("Feats are returning NULL.");
    }
    log.debug("Printing Set of Features");
    for (Feature f : feats) {
        log.debug(f.getName());
        assertTrue(ArrayUtils.contains(expected_outputs, f.getName()));
    }
}
Also used : CorelexFeatureExtractor(edu.illinois.cs.cogcomp.edison.features.factory.CorelexFeatureExtractor) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Feature(edu.illinois.cs.cogcomp.edison.features.Feature)

Example 27 with Feature

use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.

the class TestLabelOneBefore method test.

public final void test() throws Exception {
    logger.info("LabelOneBefore Feature Extractor");
    // Using the first TA and a constituent between span of 30-40 as a test
    TextAnnotation ta = tas.get(2);
    View TOKENS = ta.getView("TOKENS");
    logger.info("GOT TOKENS FROM TEXTAnn");
    List<Constituent> testlist = TOKENS.getConstituentsCoveringSpan(0, 20);
    for (Constituent c : testlist) {
        logger.info(c.getSurfaceForm());
    }
    logger.info("Testlist size is " + testlist.size());
    // Constituent test = testlist.get(1);
    // logger.info("The constituent we are extracting features from
    // in this test is: " + test.getSurfaceForm());
    POSBaseLineCounter posBaseLine = new POSBaseLineCounter("posBaseLine");
    posBaseLine.buildTable(TestPosHelper.corpus);
    POSMikheevCounter posMikheev = new POSMikheevCounter("posMikheev");
    posMikheev.buildTable(TestPosHelper.corpus);
    LabelOneBefore l1bPOS = new LabelOneBefore("l1bPOS");
    LabelOneBefore l1bPOSBaseLine = new LabelOneBefore("l1bPOSBaseLine", posBaseLine);
    LabelOneBefore l1bPOSMikheev = new LabelOneBefore("l1bPOSMikheev", posMikheev);
    // Test when using POS View
    ArrayList<Set<Feature>> featslist = new ArrayList<>();
    for (Constituent test : testlist) featslist.add(l1bPOS.getFeatures(test));
    if (featslist.isEmpty()) {
        logger.info("Feats list is returning NULL.");
    }
    logger.info("\n" + "Test when using POS View");
    logger.info("Printing list of Feature set");
    for (Set<Feature> feats : featslist) {
        for (Feature f : feats) logger.info(f.getName());
    }
    // Test when using POS baseline Counting
    featslist.clear();
    for (Constituent test : testlist) featslist.add(l1bPOSBaseLine.getFeatures(test));
    if (featslist.isEmpty()) {
        logger.info("Feats list is returning NULL.");
    }
    logger.info("\n" + "Test when using POS baseline Counting");
    logger.info("Printing list of Feature set");
    for (Set<Feature> feats : featslist) {
        for (Feature f : feats) logger.info(f.getName());
    }
    // Test when using POS Mikheev Counting
    featslist.clear();
    for (Constituent test : testlist) featslist.add(l1bPOSMikheev.getFeatures(test));
    if (featslist.isEmpty()) {
        logger.info("Feats list is returning NULL.");
    }
    logger.info("\n" + "Test when using POS Mikheev Counting");
    logger.info("Printing list of Feature set");
    for (Set<Feature> feats : featslist) {
        for (Feature f : feats) logger.info(f.getName());
    }
    logger.info("GOT FEATURES YES!");
}
Also used : Set(java.util.Set) ArrayList(java.util.ArrayList) LabelOneBefore(edu.illinois.cs.cogcomp.edison.features.lrec.LabelOneBefore) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) POSBaseLineCounter(edu.illinois.cs.cogcomp.edison.utilities.POSBaseLineCounter) POSMikheevCounter(edu.illinois.cs.cogcomp.edison.utilities.POSMikheevCounter)

Example 28 with Feature

use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.

the class GazetteerWindowTwo method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    Set<Feature> feats = new HashSet<>();
    int i = 0;
    View gazView = c.getTextAnnotation().getView(ViewNames.GAZETTEER_NE);
    // suppose for now that gaz-ne view has a constituent for each gazetteer match -- either
    // a single constituent for each word for each match, or a single constituent for each
    // complete match
    List<Constituent> overlapCons = gazView.getConstituentsCovering(c);
    for (Constituent oc : overlapCons) {
        /**
             * assumes we are dealing with multi-token Gazetteer constituents; otherwise, must track
             * match position of gazetteer entry in the single-token Constituent as a parameter
             * (e.g. attributes are keyed on name of matched gazetteer, and value is the position of
             * the gazetteer entry matched by the current token)
             */
        int relativePosition = c.getStartSpan() - oc.getStartSpan();
        String[] pieces = { getName(), ":", "(", Integer.toString(relativePosition), ")", oc.getLabel() };
        feats.add(FeatureCreatorUtil.createFeatureFromArray(pieces));
    }
    return feats;
}
Also used : DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) HashSet(java.util.HashSet)

Example 29 with Feature

use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.

the class WordEmbeddingWindow method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    Set<Feature> features = new HashSet<>();
    // get allowable window given position in text
    IntPair relativeWindow = FeatureCreatorUtil.getWindowSpan(c, windowStart, windowEnd, ignoreSentenceBoundaries);
    int absStart = c.getStartSpan() - relativeWindow.getFirst();
    View tokens = c.getTextAnnotation().getView(ViewNames.TOKENS);
    for (int i = relativeWindow.getFirst(); i <= relativeWindow.getSecond(); ++i) {
        Constituent word = tokens.getConstituentsCoveringToken(absStart + i).get(0);
        double[] embedding = WordEmbeddings.getEmbedding(word);
        if (embedding != null) {
            for (int dim = 0; dim < embedding.length; dim++) {
                final String[] pieces = { getName(), ":", "place", Integer.toString(i), "dim", Integer.toString(dim), ":", Double.toString(embedding[dim]) };
                features.add(FeatureCreatorUtil.createFeatureFromArray(pieces));
            }
        }
        i++;
    }
    return features;
}
Also used : Feature(edu.illinois.cs.cogcomp.edison.features.Feature) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) HashSet(java.util.HashSet)

Example 30 with Feature

use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.

the class TestPOSBaseLineFeatureExtractor method test.

@Test
public final void test() throws Exception {
    logger.info("POSBaseLine Feature Extractor");
    // Using the first TA and a constituent between span of 30-40 as a test
    TextAnnotation ta = tas.get(2);
    View TOKENS = ta.getView("TOKENS");
    logger.info("GOT TOKENS FROM TEXTAnn");
    List<Constituent> testlist = TOKENS.getConstituentsCoveringSpan(0, 20);
    for (Constituent c : testlist) {
        logger.info(c.getSurfaceForm());
    }
    logger.info("Testlist size is " + testlist.size());
    // Constituent test = testlist.get(1);
    // logger.info("The constituent we are extracting features from
    // in this test is: " + test.getSurfaceForm());
    // logger.info(TestPOSBaseLineFeatureExtractor.class.getProtectionDomain().getCodeSource().getLocation());
    // logger.info(System.getProperty("user.dir"));
    POSBaseLineFeatureExtractor posBaseLine = new POSBaseLineFeatureExtractor("posBaseLine", "test_corpus", TestPosHelper.corpus);
    ArrayList<Set<Feature>> featslist = new ArrayList<>();
    for (Constituent test : testlist) featslist.add(posBaseLine.getFeatures(test));
    if (featslist.isEmpty()) {
        logger.info("Feats list is returning NULL.");
    }
    logger.info("Printing list of Feature set");
    for (Set<Feature> feats : featslist) {
        for (Feature f : feats) logger.info(f.getName());
    }
    /*
         * Set<Feature> feats = posBaseLine.getFeatures(test);
         * 
         * if (feats == null) { logger.info("Feats are returning NULL."); }
         * 
         * logger.info("Printing Set of Features");
         * 
         * for (Feature f : feats) { logger.info(f.getName()); }
         */
    logger.info("GOT FEATURES YES!");
}
Also used : Set(java.util.Set) ArrayList(java.util.ArrayList) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) Test(org.junit.Test)

Aggregations

Feature (edu.illinois.cs.cogcomp.edison.features.Feature)71 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)48 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)44 DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)41 LinkedHashSet (java.util.LinkedHashSet)24 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)22 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)17 Test (org.junit.Test)13 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)12 HashSet (java.util.HashSet)11 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)10 ArrayList (java.util.ArrayList)9 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)8 RealFeature (edu.illinois.cs.cogcomp.edison.features.RealFeature)8 Set (java.util.Set)6 TokenLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView)5 POSBaseLineCounter (edu.illinois.cs.cogcomp.edison.utilities.POSBaseLineCounter)5 POSMikheevCounter (edu.illinois.cs.cogcomp.edison.utilities.POSMikheevCounter)5 ModelInfo (edu.illinois.cs.cogcomp.verbsense.core.ModelInfo)3 List (java.util.List)3