Search in sources :

Example 56 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.

the class PronounRatioTest method posContextFeatureExtractorTest.

@Test
public void posContextFeatureExtractorTest() throws Exception {
    AnalysisEngineDescription desc = createEngineDescription(createEngineDescription(BreakIteratorSegmenter.class), createEngineDescription(OpenNlpPosTagger.class, OpenNlpPosTagger.PARAM_LANGUAGE, "en"));
    AnalysisEngine engine = createEngine(desc);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    jcas.setDocumentText("He is no tester. I am a tester.");
    engine.process(jcas);
    TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length());
    aTarget.addToIndexes();
    PronounRatioFeatureExtractor extractor = new PronounRatioFeatureExtractor();
    List<Feature> features = new ArrayList<Feature>(extractor.extract(jcas, aTarget));
    Assert.assertEquals(7, features.size());
    for (Feature feature : features) {
        if (feature.getName().equals(FN_HE_RATIO)) {
            assertFeature(FN_HE_RATIO, 0.5, feature);
        } else if (feature.getName().equals(FN_WE_RATIO)) {
            assertFeature(FN_WE_RATIO, 0.0, feature);
        }
    }
}
Also used : BreakIteratorSegmenter(de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter) PronounRatioFeatureExtractor(org.dkpro.tc.features.syntax.PronounRatioFeatureExtractor) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) OpenNlpPosTagger(de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger) FeatureTestUtil.assertFeature(org.dkpro.tc.testing.FeatureTestUtil.assertFeature) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 57 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.

the class EmoticonRatioTest method emoticonRatioFeatureExtractorTest.

@Test
public void emoticonRatioFeatureExtractorTest() throws Exception {
    AnalysisEngineDescription desc = createEngineDescription(NoOpAnnotator.class);
    AnalysisEngine engine = createEngine(desc);
    TokenBuilder<Token, Sentence> builder = TokenBuilder.create(Token.class, Sentence.class);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    builder.buildTokens(jcas, "This is a very emotional tweet ;-)");
    POS_EMO emo = new POS_EMO(jcas);
    emo.setBegin(31);
    emo.setEnd(34);
    emo.addToIndexes();
    engine.process(jcas);
    TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length());
    aTarget.addToIndexes();
    EmoticonRatio extractor = new EmoticonRatio();
    List<Feature> features = new ArrayList<Feature>(extractor.extract(jcas, aTarget));
    Assert.assertEquals(1, features.size());
    for (Feature feature : features) {
        assertFeature(EmoticonRatio.class.getSimpleName(), 0.14, feature, 0.01);
    }
}
Also used : EmoticonRatio(org.dkpro.tc.features.twitter.EmoticonRatio) POS_EMO(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.tweet.POS_EMO) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) FeatureTestUtil.assertFeature(org.dkpro.tc.testing.FeatureTestUtil.assertFeature) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 58 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.

the class DiffNounChunkTokenLengthTest method setUp.

@Before
public void setUp() throws ResourceInitializationException, AnalysisEngineProcessException {
    AnalysisEngineDescription desc = createEngineDescription(BreakIteratorSegmenter.class);
    AnalysisEngine engine = createEngine(desc);
    jcas1 = engine.newJCas();
    jcas1.setDocumentLanguage("en");
    jcas1.setDocumentText("This is the text of view 1");
    engine.process(jcas1);
    jcas2 = engine.newJCas();
    jcas2.setDocumentLanguage("en");
    jcas2.setDocumentText("This is the text of view 2");
    engine.process(jcas2);
}
Also used : AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Before(org.junit.Before)

Example 59 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.

the class DiffNrOfTokensPairFeatureExtractorTest method testExtract.

@Test
public void testExtract() throws ResourceInitializationException, AnalysisEngineProcessException, TextClassificationException {
    AnalysisEngineDescription desc = createEngineDescription(BreakIteratorSegmenter.class);
    AnalysisEngine engine = createEngine(desc);
    JCas jcas1 = engine.newJCas();
    jcas1.setDocumentLanguage("en");
    jcas1.setDocumentText("This is the text of view 1. And some more.");
    engine.process(jcas1);
    JCas jcas2 = engine.newJCas();
    jcas2.setDocumentLanguage("en");
    jcas2.setDocumentText("This is the text of view 2.");
    engine.process(jcas2);
    DiffNrOfTokensPairFeatureExtractor extractor = new DiffNrOfTokensPairFeatureExtractor();
    Set<Feature> features = extractor.extract(jcas1, jcas2);
    assertEquals(1, features.size());
    for (Feature feature : features) {
        assertFeature("DiffNrOfTokens", 4, feature);
    }
}
Also used : DiffNrOfTokensPairFeatureExtractor(org.dkpro.tc.features.pair.core.length.DiffNrOfTokensPairFeatureExtractor) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) JCas(org.apache.uima.jcas.JCas) FeatureTestUtil.assertFeature(org.dkpro.tc.testing.FeatureTestUtil.assertFeature) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 60 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.

the class BaselinePairFeatureTest method extractTest.

@Test
public void extractTest() throws Exception {
    AnalysisEngineDescription desc = createEngineDescription(BreakIteratorSegmenter.class);
    AnalysisEngine engine = createEngine(desc);
    PairFeatureExtractor extractor = new AlwaysZeroPairFeatureExtractor();
    Set<Feature> features = runExtractor(engine, extractor);
    assertEquals(1, features.size());
    for (Feature feature : features) {
        assertFeature("BaselineFeature", 0, feature);
    }
}
Also used : AlwaysZeroPairFeatureExtractor(org.dkpro.tc.features.pair.core.AlwaysZeroPairFeatureExtractor) PairFeatureExtractor(org.dkpro.tc.api.features.PairFeatureExtractor) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) FeatureTestUtil.assertFeature(org.dkpro.tc.testing.FeatureTestUtil.assertFeature) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) AlwaysZeroPairFeatureExtractor(org.dkpro.tc.features.pair.core.AlwaysZeroPairFeatureExtractor) Test(org.junit.Test)

Aggregations

AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)62 Test (org.junit.Test)32 File (java.io.File)27 CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)25 ArrayList (java.util.ArrayList)22 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)18 JCas (org.apache.uima.jcas.JCas)16 Feature (org.dkpro.tc.api.features.Feature)13 FeatureTestUtil.assertFeature (org.dkpro.tc.testing.FeatureTestUtil.assertFeature)11 ExternalResourceDescription (org.apache.uima.resource.ExternalResourceDescription)10 AggregateBuilder (org.apache.uima.fit.factory.AggregateBuilder)8 ResourceInitializationException (org.apache.uima.resource.ResourceInitializationException)8 JsonDataWriter (org.dkpro.tc.core.io.JsonDataWriter)8 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)7 Gson (com.google.gson.Gson)6 IOException (java.io.IOException)6 Instance (org.dkpro.tc.api.features.Instance)6 OpenNlpPosTagger (de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger)4 BreakIteratorSegmenter (de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter)4 CAS (org.apache.uima.cas.CAS)4