Search in sources :

Example 66 with Feature

use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.

the class DependencyPathNgrams method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    TextAnnotation ta = c.getTextAnnotation();
    Set<Feature> features = new LinkedHashSet<>();
    TreeView parse = (TreeView) ta.getView(dependencyViewName);
    // get equivalent of c in the parse view
    Constituent c2 = parse.getConstituentsCoveringToken(c.getStartSpan()).get(0);
    List<Relation> incomingRelations = c2.getIncomingRelations();
    if (incomingRelations.size() > 0) {
        Constituent c1 = parse.getConstituentsCoveringToken(incomingRelations.get(0).getSource().getStartSpan()).get(0);
        Pair<List<Constituent>, List<Constituent>> paths = PathFeatureHelper.getPathsToCommonAncestor(c1, c2, 400);
        List<String> path = new ArrayList<>();
        List<String> pos = new ArrayList<>();
        for (int i = 0; i < paths.getFirst().size() - 1; i++) {
            Constituent cc = paths.getFirst().get(i);
            path.add(cc.getIncomingRelations().get(0).getRelationName() + PathFeatureHelper.PATH_UP_STRING);
            pos.add(WordHelpers.getPOS(ta, cc.getStartSpan()) + ":" + cc.getIncomingRelations().get(0).getRelationName() + PathFeatureHelper.PATH_UP_STRING);
        }
        Constituent top = paths.getFirst().get(paths.getFirst().size() - 1);
        pos.add(WordHelpers.getPOS(ta, top.getStartSpan()) + ":*");
        path.add("*");
        if (paths.getSecond().size() > 1) {
            for (int i = paths.getSecond().size() - 2; i >= 0; i--) {
                Constituent cc = paths.getSecond().get(i);
                pos.add(WordHelpers.getPOS(ta, cc.getStartSpan()) + ":" + PathFeatureHelper.PATH_DOWN_STRING);
                path.add(PathFeatureHelper.PATH_DOWN_STRING);
            }
        }
        features.addAll(getNgrams(path, ""));
        features.addAll(getNgrams(pos, "pos"));
    }
    return features;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) ArrayList(java.util.ArrayList) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) Relation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation) TreeView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView) ArrayList(java.util.ArrayList) List(java.util.List) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 67 with Feature

use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.

the class LinearPosition method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    List<Relation> incomingRelation = c.getIncomingRelations();
    Set<Feature> features = new LinkedHashSet<>();
    if (incomingRelation.size() > 0) {
        Constituent predicate = incomingRelation.get(0).getSource();
        if (predicate.getStartSpan() >= c.getEndSpan())
            features.add(BEFORE);
        else if (c.getStartSpan() >= predicate.getEndSpan())
            features.add(AFTER);
        else
            features.add(CONTAINS);
    }
    return features;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Relation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 68 with Feature

use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.

the class ParseHeadWordPOS method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    TextAnnotation ta = c.getTextAnnotation();
    TreeView tree = (TreeView) ta.getView(parseViewName);
    Constituent phrase;
    try {
        phrase = tree.getParsePhrase(c);
    } catch (Exception e) {
        throw new EdisonException(e);
    }
    Set<Feature> features = new LinkedHashSet<>();
    int head = CollinsHeadFinder.getInstance().getHeadWordPosition(phrase);
    features.add(DiscreteFeature.create("hw:" + ta.getToken(head).toLowerCase().trim()));
    features.add(DiscreteFeature.create("h-pos:" + WordHelpers.getPOS(ta, head)));
    return features;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) TreeView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException)

Example 69 with Feature

use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.

the class TestWordTypeInformation method test.

@Test
public final void test() throws EdisonException {
    log.debug("WordTypeInformation");
    // Using the first TA and a constituent between span of 0 - 20 as a test
    TextAnnotation ta = tas.get(1);
    View TOKENS = ta.getView("TOKENS");
    log.debug("GOT TOKENS FROM TEXTAnn");
    List<Constituent> testlist = TOKENS.getConstituentsCoveringSpan(0, 20);
    String[] teststrings = new String[5];
    int i = 0, start = 1, end = 6;
    for (Constituent c : testlist) {
        log.debug(c.getSurfaceForm());
        if (i >= start && i < end) {
            teststrings[i - start] = c.getSurfaceForm();
        }
        i++;
    }
    log.debug("Testlist size is " + testlist.size());
    Constituent test = testlist.get(3);
    log.debug("The constituent we are extracting features from in this test is: " + test.getSurfaceForm());
    WordTypeInformation wti = new WordTypeInformation("WordTypeInformation");
    log.debug("Startspan is " + test.getStartSpan() + " and Endspan is " + test.getEndSpan());
    Set<Feature> feats = wti.getFeatures(test);
    String[] expected_outputs = { "WordTypeInformation:c0(false)", "WordTypeInformation:d0(false)", "WordTypeInformation:c1(false)", "WordTypeInformation:d1(false)", "WordTypeInformation:c2(false)", "WordTypeInformation:d2(false)", "WordTypeInformation:c2(true)", "WordTypeInformation:c3(false)", "WordTypeInformation:d3(false)", "WordTypeInformation:c4(false)", "WordTypeInformation:d4(false)", "WordTypeInformation:c4(true)" };
    Set<String> __result = new LinkedHashSet<String>();
    String __id;
    String __value;
    String classifier = "WordTypeInformation";
    if (feats == null) {
        log.debug("Feats are returning NULL.");
        assertFalse(true);
    }
    log.debug("Printing Set of Features");
    for (Feature f : feats) {
        log.debug(f.getName());
        assert (ArrayUtils.contains(expected_outputs, f.getName()));
    }
    for (; (start < end && teststrings[start - 1] != null); start++) {
        boolean allCapitalized = true, allDigits = true, allNonLetters = true;
        for (int j = 0; j < teststrings[start - 1].length(); ++j) {
            allCapitalized &= Character.isUpperCase(teststrings[start - 1].charAt(j));
            allDigits &= Character.isDigit(teststrings[start - 1].charAt(j));
            allNonLetters &= !Character.isLetter(teststrings[start - 1].charAt(j));
        }
        __id = classifier + ":" + ("c" + (start - 1));
        __value = "(" + (allCapitalized) + ")";
        __result.add(__id + __value);
        __id = classifier + ":" + ("d" + (start - 1));
        __value = "(" + (allDigits) + ")";
        __result.add(__id + __value);
        __id = classifier + ":" + ("c" + (start - 1));
        __value = "(" + (allNonLetters) + ")";
        __result.add(__id + __value);
    }
    for (Feature feat : feats) {
        if (!__result.contains(feat.getName())) {
            assertFalse(true);
        }
    }
// System.exit(0);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) PredicateArgumentView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) Test(org.junit.Test)

Example 70 with Feature

use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.

the class TestPOSWindow method test.

@Test
public final void test() throws Exception {
    logger.info("POSWindow Feature Extractor");
    // Using the first TA and a constituent between span of 30-40 as a test
    TextAnnotation ta = tas.get(2);
    View TOKENS = ta.getView("TOKENS");
    logger.info("GOT TOKENS FROM TEXTAnn");
    List<Constituent> testlist = TOKENS.getConstituentsCoveringSpan(0, 20);
    for (Constituent c : testlist) {
        logger.info(c.getSurfaceForm());
    }
    logger.info("Testlist size is " + testlist.size());
    POSBaseLineCounter posBaseLine = new POSBaseLineCounter("posBaseLine");
    posBaseLine.buildTable(TestPosHelper.corpus);
    POSMikheevCounter posMikheev = new POSMikheevCounter("posMikheev");
    posMikheev.buildTable(TestPosHelper.corpus);
    POSWindow posWindowPOS = new POSWindow("posWindowPOS");
    POSWindow posWindowPOSBaseLine = new POSWindow("posWindowPOSBaseLine", posBaseLine);
    POSWindow posWindowPOSMikheev = new POSWindow("posWindowPOSMikheev", posMikheev);
    // Test when using POS View
    ArrayList<Set<Feature>> featslist = new ArrayList<>();
    for (Constituent test : testlist) featslist.add(posWindowPOS.getFeatures(test));
    if (featslist.isEmpty()) {
        logger.info("Feats list is returning NULL.");
    }
    logger.info("\n" + "Test when using POS View");
    logger.info("Printing list of Feature set");
    for (Set<Feature> feats : featslist) {
        logger.info("\n");
        for (Feature f : feats) logger.info(f.getName());
    }
    // Test when using POS baseline Counting
    featslist.clear();
    for (Constituent test : testlist) featslist.add(posWindowPOSBaseLine.getFeatures(test));
    if (featslist.isEmpty()) {
        logger.info("Feats list is returning NULL.");
    }
    logger.info("\n" + "Test when using POS baseline Counting");
    logger.info("Printing list of Feature set");
    for (Set<Feature> feats : featslist) {
        logger.info("\n");
        for (Feature f : feats) logger.info(f.getName());
    }
    // Test when using POS Mikheev Counting
    featslist.clear();
    for (Constituent test : testlist) featslist.add(posWindowPOSMikheev.getFeatures(test));
    if (featslist.isEmpty()) {
        logger.info("Feats list is returning NULL.");
    }
    logger.info("\n" + "Test when using POS Mikheev Counting");
    logger.info("Printing list of Feature set");
    for (Set<Feature> feats : featslist) {
        logger.info("\n");
        for (Feature f : feats) logger.info(f.getName());
    }
    logger.info("GOT FEATURES YES!");
}
Also used : Set(java.util.Set) ArrayList(java.util.ArrayList) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) POSBaseLineCounter(edu.illinois.cs.cogcomp.edison.utilities.POSBaseLineCounter) POSMikheevCounter(edu.illinois.cs.cogcomp.edison.utilities.POSMikheevCounter) Test(org.junit.Test)

Aggregations

Feature (edu.illinois.cs.cogcomp.edison.features.Feature)71 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)48 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)44 DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)41 LinkedHashSet (java.util.LinkedHashSet)24 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)22 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)17 Test (org.junit.Test)13 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)12 HashSet (java.util.HashSet)11 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)10 ArrayList (java.util.ArrayList)9 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)8 RealFeature (edu.illinois.cs.cogcomp.edison.features.RealFeature)8 Set (java.util.Set)6 TokenLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView)5 POSBaseLineCounter (edu.illinois.cs.cogcomp.edison.utilities.POSBaseLineCounter)5 POSMikheevCounter (edu.illinois.cs.cogcomp.edison.utilities.POSMikheevCounter)5 ModelInfo (edu.illinois.cs.cogcomp.verbsense.core.ModelInfo)3 List (java.util.List)3