Search in sources :

Example 71 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class PronounRatioFeatureExtractor method extract.

@Override
public Set<Feature> extract(JCas jcas, TextClassificationTarget aTarget) throws TextClassificationException {
    int heCount = 0;
    int sheCount = 0;
    int iCount = 0;
    int weCount = 0;
    int theyCount = 0;
    int usCount = 0;
    int youCount = 0;
    int n = 0;
    for (POS_PRON pronoun : JCasUtil.selectCovered(jcas, POS_PRON.class, aTarget)) {
        n++;
        String text = pronoun.getCoveredText().toLowerCase();
        if (text.equals("he")) {
            heCount++;
        } else if (text.equals("she")) {
            sheCount++;
        } else if (text.equals("i")) {
            iCount++;
        } else if (text.equals("we")) {
            weCount++;
        } else if (text.equals("they")) {
            theyCount++;
        } else if (text.equals("us")) {
            usCount++;
        } else if (text.equals("you")) {
            youCount++;
        }
    }
    Set<Feature> features = new HashSet<Feature>();
    if (n > 0) {
        features.add(new Feature(FN_HE_RATIO, (double) heCount / n, FeatureType.NUMERIC));
        features.add(new Feature(FN_SHE_RATIO, (double) sheCount / n, FeatureType.NUMERIC));
        features.add(new Feature(FN_I_RATIO, (double) iCount / n, FeatureType.NUMERIC));
        features.add(new Feature(FN_WE_RATIO, (double) weCount / n, FeatureType.NUMERIC));
        features.add(new Feature(FN_THEY_RATIO, (double) theyCount / n, FeatureType.NUMERIC));
        features.add(new Feature(FN_US_RATIO, (double) usCount / n, FeatureType.NUMERIC));
        features.add(new Feature(FN_YOU_RATIO, (double) youCount / n, FeatureType.NUMERIC));
    }
    return features;
}
Also used : POS_PRON(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_PRON) Feature(org.dkpro.tc.api.features.Feature) HashSet(java.util.HashSet)

Example 72 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class TargetSurfaceFormContextFeature method extract.

public Set<Feature> extract(JCas aView, TextClassificationTarget unit) throws TextClassificationException {
    super.extract(aView, unit);
    Integer currentTargetIdx = super.unitBegin2Idx.get(unit.getBegin());
    Integer targetIdx = currentTargetIdx + shiftIdx;
    String featureVal = getTargetText(targetIdx);
    return new Feature(FEATURE_NAME + toHumanReadable(shiftIdx), featureVal, FeatureType.NUMERIC).asSet();
}
Also used : Feature(org.dkpro.tc.api.features.Feature)

Example 73 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class ContextualityTest method posContextFeatureExtractorTest.

@Test
public void posContextFeatureExtractorTest() throws Exception {
    AnalysisEngineDescription desc = createEngineDescription(createEngineDescription(BreakIteratorSegmenter.class), createEngineDescription(OpenNlpPosTagger.class, OpenNlpPosTagger.PARAM_LANGUAGE, "en"));
    AnalysisEngine engine = createEngine(desc);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    jcas.setDocumentText("This is a test.");
    engine.process(jcas);
    TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length());
    aTarget.addToIndexes();
    ContextualityMeasureFeatureExtractor extractor = new ContextualityMeasureFeatureExtractor();
    List<Feature> features = new ArrayList<Feature>(extractor.extract(jcas, aTarget));
    Assert.assertEquals(8, features.size());
    for (Feature feature : features) {
        if (feature.getName().equals(CONTEXTUALITY_MEASURE_FN)) {
            assertFeature(CONTEXTUALITY_MEASURE_FN, 50.2, feature);
        }
    }
}
Also used : ContextualityMeasureFeatureExtractor(org.dkpro.tc.features.style.ContextualityMeasureFeatureExtractor) BreakIteratorSegmenter(de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) OpenNlpPosTagger(de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger) FeatureTestUtil.assertFeature(org.dkpro.tc.testing.FeatureTestUtil.assertFeature) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 74 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class POSRatioTest method posContextFeatureExtractorTest.

@Test
public void posContextFeatureExtractorTest() throws Exception {
    AnalysisEngineDescription desc = createEngineDescription(createEngineDescription(BreakIteratorSegmenter.class), createEngineDescription(OpenNlpPosTagger.class, OpenNlpPosTagger.PARAM_LANGUAGE, "en"));
    AnalysisEngine engine = createEngine(desc);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    jcas.setDocumentText("As the emeritus pope leaves the Vatican for the papal residence of Castel Gandolfo – and becomes the first pontiff to resign in 600 years – the operation to choose his successor begins. With the throne of St Peter declared empty and the interregnum formally begun, as many of the 208 cardinals who can make the journey will be expected to travel to the Vatican to help run the church in the absence of a pope.");
    engine.process(jcas);
    TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length());
    aTarget.addToIndexes();
    POSRatioFeatureExtractor extractor = new POSRatioFeatureExtractor();
    List<Feature> features = new ArrayList<Feature>(extractor.extract(jcas, aTarget));
    Assert.assertEquals(11, features.size());
    for (Feature feature : features) {
        if (feature.getName().equals(FN_N_RATIO)) {
            assertFeature(FN_N_RATIO, 0.2658, feature, 0.0001);
        } else if (feature.getName().equals(FN_PUNC_RATIO)) {
            assertFeature(FN_PUNC_RATIO, 0.0380, feature, 0.0001);
        }
    }
}
Also used : POSRatioFeatureExtractor(org.dkpro.tc.features.syntax.POSRatioFeatureExtractor) BreakIteratorSegmenter(de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) OpenNlpPosTagger(de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger) FeatureTestUtil.assertFeature(org.dkpro.tc.testing.FeatureTestUtil.assertFeature) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 75 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class AdjectiveEndingTest method adjectiveEndingFeatureExtractorTest.

@Test
public void adjectiveEndingFeatureExtractorTest() throws Exception {
    AnalysisEngineDescription desc = createEngineDescription(createEngineDescription(BreakIteratorSegmenter.class), createEngineDescription(OpenNlpPosTagger.class, OpenNlpPosTagger.PARAM_LANGUAGE, "en"));
    AnalysisEngine engine = createEngine(desc);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    jcas.setDocumentText("Lovable phenomenal beautiful incredible fantastic gorgeous positive nice good mainly harmless.");
    engine.process(jcas);
    TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length());
    aTarget.addToIndexes();
    AdjectiveEndingFeatureExtractor extractor = new AdjectiveEndingFeatureExtractor();
    Set<Feature> features = extractor.extract(jcas, aTarget);
    Assert.assertEquals(9, features.size());
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING1, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING2, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING3, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING4, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING5, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING6, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING7, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING8, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADV_ENDING9, 100.0, features, 0.001);
}
Also used : AdjectiveEndingFeatureExtractor(org.dkpro.tc.features.style.AdjectiveEndingFeatureExtractor) BreakIteratorSegmenter(de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) OpenNlpPosTagger(de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Aggregations

Feature (org.dkpro.tc.api.features.Feature)94 Test (org.junit.Test)48 Instance (org.dkpro.tc.api.features.Instance)30 ArrayList (java.util.ArrayList)29 HashSet (java.util.HashSet)21 FeatureTestUtil.assertFeature (org.dkpro.tc.testing.FeatureTestUtil.assertFeature)17 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)16 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)16 JCas (org.apache.uima.jcas.JCas)15 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)13 File (java.io.File)8 Attribute (weka.core.Attribute)8 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)7 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)6 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)5 Chunk (de.tudarmstadt.ukp.dkpro.core.api.syntax.type.chunk.Chunk)4 OpenNlpPosTagger (de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger)4 BreakIteratorSegmenter (de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter)4 Instances (weka.core.Instances)4 IOException (java.io.IOException)3