Search in sources :

Example 51 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.

the class NumberOfHashTagsTest method numberOfHashTagsFeatureExtractorTest.

@Test
public void numberOfHashTagsFeatureExtractorTest() throws Exception {
    AnalysisEngineDescription desc = createEngineDescription(NoOpAnnotator.class);
    AnalysisEngine engine = createEngine(desc);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    jcas.setDocumentText("This is a very #emotional tweet ;-) #icouldcry #ILoveHashTags");
    engine.process(jcas);
    TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length());
    aTarget.addToIndexes();
    NumberOfHashTags extractor = new NumberOfHashTags();
    List<Feature> features = new ArrayList<Feature>(extractor.extract(jcas, aTarget));
    Assert.assertEquals(1, features.size());
    for (Feature feature : features) {
        assertFeature(NumberOfHashTags.class.getSimpleName(), 3, feature);
    }
}
Also used : AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) NumberOfHashTags(org.dkpro.tc.features.twitter.NumberOfHashTags) FeatureTestUtil.assertFeature(org.dkpro.tc.testing.FeatureTestUtil.assertFeature) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 52 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.

the class ContextualityTest method posContextFeatureExtractorTest.

@Test
public void posContextFeatureExtractorTest() throws Exception {
    AnalysisEngineDescription desc = createEngineDescription(createEngineDescription(BreakIteratorSegmenter.class), createEngineDescription(OpenNlpPosTagger.class, OpenNlpPosTagger.PARAM_LANGUAGE, "en"));
    AnalysisEngine engine = createEngine(desc);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    jcas.setDocumentText("This is a test.");
    engine.process(jcas);
    TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length());
    aTarget.addToIndexes();
    ContextualityMeasureFeatureExtractor extractor = new ContextualityMeasureFeatureExtractor();
    List<Feature> features = new ArrayList<Feature>(extractor.extract(jcas, aTarget));
    Assert.assertEquals(8, features.size());
    for (Feature feature : features) {
        if (feature.getName().equals(CONTEXTUALITY_MEASURE_FN)) {
            assertFeature(CONTEXTUALITY_MEASURE_FN, 50.2, feature);
        }
    }
}
Also used : ContextualityMeasureFeatureExtractor(org.dkpro.tc.features.style.ContextualityMeasureFeatureExtractor) BreakIteratorSegmenter(de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) OpenNlpPosTagger(de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger) FeatureTestUtil.assertFeature(org.dkpro.tc.testing.FeatureTestUtil.assertFeature) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 53 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.

the class POSRatioTest method posContextFeatureExtractorTest.

@Test
public void posContextFeatureExtractorTest() throws Exception {
    AnalysisEngineDescription desc = createEngineDescription(createEngineDescription(BreakIteratorSegmenter.class), createEngineDescription(OpenNlpPosTagger.class, OpenNlpPosTagger.PARAM_LANGUAGE, "en"));
    AnalysisEngine engine = createEngine(desc);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    jcas.setDocumentText("As the emeritus pope leaves the Vatican for the papal residence of Castel Gandolfo – and becomes the first pontiff to resign in 600 years – the operation to choose his successor begins. With the throne of St Peter declared empty and the interregnum formally begun, as many of the 208 cardinals who can make the journey will be expected to travel to the Vatican to help run the church in the absence of a pope.");
    engine.process(jcas);
    TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length());
    aTarget.addToIndexes();
    POSRatioFeatureExtractor extractor = new POSRatioFeatureExtractor();
    List<Feature> features = new ArrayList<Feature>(extractor.extract(jcas, aTarget));
    Assert.assertEquals(11, features.size());
    for (Feature feature : features) {
        if (feature.getName().equals(FN_N_RATIO)) {
            assertFeature(FN_N_RATIO, 0.2658, feature, 0.0001);
        } else if (feature.getName().equals(FN_PUNC_RATIO)) {
            assertFeature(FN_PUNC_RATIO, 0.0380, feature, 0.0001);
        }
    }
}
Also used : POSRatioFeatureExtractor(org.dkpro.tc.features.syntax.POSRatioFeatureExtractor) BreakIteratorSegmenter(de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) OpenNlpPosTagger(de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger) FeatureTestUtil.assertFeature(org.dkpro.tc.testing.FeatureTestUtil.assertFeature) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 54 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.

the class AdjectiveEndingTest method adjectiveEndingFeatureExtractorTest.

@Test
public void adjectiveEndingFeatureExtractorTest() throws Exception {
    AnalysisEngineDescription desc = createEngineDescription(createEngineDescription(BreakIteratorSegmenter.class), createEngineDescription(OpenNlpPosTagger.class, OpenNlpPosTagger.PARAM_LANGUAGE, "en"));
    AnalysisEngine engine = createEngine(desc);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    jcas.setDocumentText("Lovable phenomenal beautiful incredible fantastic gorgeous positive nice good mainly harmless.");
    engine.process(jcas);
    TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length());
    aTarget.addToIndexes();
    AdjectiveEndingFeatureExtractor extractor = new AdjectiveEndingFeatureExtractor();
    Set<Feature> features = extractor.extract(jcas, aTarget);
    Assert.assertEquals(9, features.size());
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING1, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING2, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING3, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING4, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING5, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING6, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING7, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADJ_ENDING8, 10.0, features, 0.001);
    assertFeatures(AdjectiveEndingFeatureExtractor.ADV_ENDING9, 100.0, features, 0.001);
}
Also used : AdjectiveEndingFeatureExtractor(org.dkpro.tc.features.style.AdjectiveEndingFeatureExtractor) BreakIteratorSegmenter(de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) OpenNlpPosTagger(de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 55 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.

the class InitialCharacterUpperCaseTest method initialLetterTest.

@Test
public void initialLetterTest() throws Exception {
    AnalysisEngineDescription desc = createEngineDescription(BreakIteratorSegmenter.class);
    AnalysisEngine engine = createEngine(desc);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    jcas.setDocumentText("he Loves it");
    engine.process(jcas);
    TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 3, 8);
    aTarget.addToIndexes();
    InitialCharacterUpperCase extractor = new InitialCharacterUpperCase();
    Set<Feature> features = extractor.extract(jcas, aTarget);
    List<Feature> fetList = new ArrayList<>(features);
    Assert.assertEquals(1, features.size());
    Assert.assertEquals(InitialCharacterUpperCase.FEATURE_NAME, fetList.get(0).getName());
    Assert.assertEquals(1.0, (double) fetList.get(0).getValue(), 0.1);
}
Also used : AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Aggregations

AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)62 Test (org.junit.Test)32 File (java.io.File)27 CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)25 ArrayList (java.util.ArrayList)22 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)18 JCas (org.apache.uima.jcas.JCas)16 Feature (org.dkpro.tc.api.features.Feature)13 FeatureTestUtil.assertFeature (org.dkpro.tc.testing.FeatureTestUtil.assertFeature)11 ExternalResourceDescription (org.apache.uima.resource.ExternalResourceDescription)10 AggregateBuilder (org.apache.uima.fit.factory.AggregateBuilder)8 ResourceInitializationException (org.apache.uima.resource.ResourceInitializationException)8 JsonDataWriter (org.dkpro.tc.core.io.JsonDataWriter)8 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)7 Gson (com.google.gson.Gson)6 IOException (java.io.IOException)6 Instance (org.dkpro.tc.api.features.Instance)6 OpenNlpPosTagger (de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger)4 BreakIteratorSegmenter (de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter)4 CAS (org.apache.uima.cas.CAS)4