Search in sources :

Example 51 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class POSRatioTest method posContextFeatureExtractorTest.

@Test
public void posContextFeatureExtractorTest() throws Exception {
    AnalysisEngineDescription desc = createEngineDescription(createEngineDescription(BreakIteratorSegmenter.class), createEngineDescription(OpenNlpPosTagger.class, OpenNlpPosTagger.PARAM_LANGUAGE, "en"));
    AnalysisEngine engine = createEngine(desc);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    jcas.setDocumentText("As the emeritus pope leaves the Vatican for the papal residence of Castel Gandolfo – and becomes the first pontiff to resign in 600 years – the operation to choose his successor begins. With the throne of St Peter declared empty and the interregnum formally begun, as many of the 208 cardinals who can make the journey will be expected to travel to the Vatican to help run the church in the absence of a pope.");
    engine.process(jcas);
    TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length());
    aTarget.addToIndexes();
    POSRatioFeatureExtractor extractor = new POSRatioFeatureExtractor();
    List<Feature> features = new ArrayList<Feature>(extractor.extract(jcas, aTarget));
    Assert.assertEquals(11, features.size());
    for (Feature feature : features) {
        if (feature.getName().equals(FN_N_RATIO)) {
            assertFeature(FN_N_RATIO, 0.2658, feature, 0.0001);
        } else if (feature.getName().equals(FN_PUNC_RATIO)) {
            assertFeature(FN_PUNC_RATIO, 0.0380, feature, 0.0001);
        }
    }
}
Also used : POSRatioFeatureExtractor(org.dkpro.tc.features.syntax.POSRatioFeatureExtractor) BreakIteratorSegmenter(de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) OpenNlpPosTagger(de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger) FeatureTestUtil.assertFeature(org.dkpro.tc.testing.FeatureTestUtil.assertFeature) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 52 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class AdjectiveEndingTest method adjectiveEndingFeatureExtractorTest.

@Test
public void adjectiveEndingFeatureExtractorTest() throws Exception {
    AnalysisEngineDescription desc = createEngineDescription(createEngineDescription(BreakIteratorSegmenter.class), createEngineDescription(OpenNlpPosTagger.class, OpenNlpPosTagger.PARAM_LANGUAGE, "en"));
    AnalysisEngine engine = createEngine(desc);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    jcas.setDocumentText("Lovable phenomenal beautiful incredible fantastic gorgeous positive nice good mainly harmless.");
    engine.process(jcas);
    TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length());
    aTarget.addToIndexes();
    AdjectiveEndingFeatureExtractor extractor = new AdjectiveEndingFeatureExtractor();
    Set<Feature> features = extractor.extract(jcas, aTarget);
    Assert.assertEquals(9, features.size());
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING1, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING2, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING3, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING4, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING5, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING6, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING7, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING8, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADV_ENDING9, 100.0, features, 0.001);
}
Also used : AdjectiveEndingFeatureExtractor(org.dkpro.tc.features.style.AdjectiveEndingFeatureExtractor) BreakIteratorSegmenter(de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) OpenNlpPosTagger(de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 53 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class InitialCharacterUpperCaseTest method initialLetterTest.

@Test
public void initialLetterTest() throws Exception {
    AnalysisEngineDescription desc = createEngineDescription(BreakIteratorSegmenter.class);
    AnalysisEngine engine = createEngine(desc);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    jcas.setDocumentText("he Loves it");
    engine.process(jcas);
    TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 3, 8);
    aTarget.addToIndexes();
    InitialCharacterUpperCase extractor = new InitialCharacterUpperCase();
    Set<Feature> features = extractor.extract(jcas, aTarget);
    List<Feature> fetList = new ArrayList<>(features);
    Assert.assertEquals(1, features.size());
    Assert.assertEquals(InitialCharacterUpperCase.FEATURE_NAME, fetList.get(0).getName());
    Assert.assertEquals(1.0, (double) fetList.get(0).getValue(), 0.1);
}
Also used : AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 54 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class PronounRatioTest method posContextFeatureExtractorTest.

@Test
public void posContextFeatureExtractorTest() throws Exception {
    AnalysisEngineDescription desc = createEngineDescription(createEngineDescription(BreakIteratorSegmenter.class), createEngineDescription(OpenNlpPosTagger.class, OpenNlpPosTagger.PARAM_LANGUAGE, "en"));
    AnalysisEngine engine = createEngine(desc);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    jcas.setDocumentText("He is no tester. I am a tester.");
    engine.process(jcas);
    TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length());
    aTarget.addToIndexes();
    PronounRatioFeatureExtractor extractor = new PronounRatioFeatureExtractor();
    List<Feature> features = new ArrayList<Feature>(extractor.extract(jcas, aTarget));
    Assert.assertEquals(7, features.size());
    for (Feature feature : features) {
        if (feature.getName().equals(FN_HE_RATIO)) {
            assertFeature(FN_HE_RATIO, 0.5, feature);
        } else if (feature.getName().equals(FN_WE_RATIO)) {
            assertFeature(FN_WE_RATIO, 0.0, feature);
        }
    }
}
Also used : BreakIteratorSegmenter(de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter) PronounRatioFeatureExtractor(org.dkpro.tc.features.syntax.PronounRatioFeatureExtractor) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) OpenNlpPosTagger(de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger) FeatureTestUtil.assertFeature(org.dkpro.tc.testing.FeatureTestUtil.assertFeature) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 55 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class EmoticonRatioTest method emoticonRatioFeatureExtractorTest.

@Test
public void emoticonRatioFeatureExtractorTest() throws Exception {
    AnalysisEngineDescription desc = createEngineDescription(NoOpAnnotator.class);
    AnalysisEngine engine = createEngine(desc);
    TokenBuilder<Token, Sentence> builder = TokenBuilder.create(Token.class, Sentence.class);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    builder.buildTokens(jcas, "This is a very emotional tweet ;-)");
    POS_EMO emo = new POS_EMO(jcas);
    emo.setBegin(31);
    emo.setEnd(34);
    emo.addToIndexes();
    engine.process(jcas);
    TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length());
    aTarget.addToIndexes();
    EmoticonRatio extractor = new EmoticonRatio();
    List<Feature> features = new ArrayList<Feature>(extractor.extract(jcas, aTarget));
    Assert.assertEquals(1, features.size());
    for (Feature feature : features) {
        assertFeature(EmoticonRatio.class.getSimpleName(), 0.14, feature, 0.01);
    }
}
Also used : EmoticonRatio(org.dkpro.tc.features.twitter.EmoticonRatio) POS_EMO(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.tweet.POS_EMO) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) FeatureTestUtil.assertFeature(org.dkpro.tc.testing.FeatureTestUtil.assertFeature) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Aggregations

TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)61 JCas (org.apache.uima.jcas.JCas)29 ArrayList (java.util.ArrayList)22 TextClassificationOutcome (org.dkpro.tc.api.type.TextClassificationOutcome)18 Feature (org.dkpro.tc.api.features.Feature)16 Test (org.junit.Test)16 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)12 TextClassificationSequence (org.dkpro.tc.api.type.TextClassificationSequence)12 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)11 JCasId (org.dkpro.tc.api.type.JCasId)11 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)8 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)7 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)7 FeatureTestUtil.assertFeature (org.dkpro.tc.testing.FeatureTestUtil.assertFeature)6 CollectionReader (org.apache.uima.collection.CollectionReader)5 FeatureExtractorResource_ImplBase (org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase)5 DocumentMetaData (de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData)4 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)4 OpenNlpPosTagger (de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger)4 BreakIteratorSegmenter (de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter)4