Search in sources :

Example 76 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class InitialCharacterUpperCaseTest method initialLetterTest.

@Test
public void initialLetterTest() throws Exception {
    AnalysisEngineDescription desc = createEngineDescription(BreakIteratorSegmenter.class);
    AnalysisEngine engine = createEngine(desc);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    jcas.setDocumentText("he Loves it");
    engine.process(jcas);
    TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 3, 8);
    aTarget.addToIndexes();
    InitialCharacterUpperCase extractor = new InitialCharacterUpperCase();
    Set<Feature> features = extractor.extract(jcas, aTarget);
    List<Feature> fetList = new ArrayList<>(features);
    Assert.assertEquals(1, features.size());
    Assert.assertEquals(InitialCharacterUpperCase.FEATURE_NAME, fetList.get(0).getName());
    Assert.assertEquals(1.0, (double) fetList.get(0).getValue(), 0.1);
}
Also used : AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 77 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class PronounRatioTest method posContextFeatureExtractorTest.

@Test
public void posContextFeatureExtractorTest() throws Exception {
    AnalysisEngineDescription desc = createEngineDescription(createEngineDescription(BreakIteratorSegmenter.class), createEngineDescription(OpenNlpPosTagger.class, OpenNlpPosTagger.PARAM_LANGUAGE, "en"));
    AnalysisEngine engine = createEngine(desc);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    jcas.setDocumentText("He is no tester. I am a tester.");
    engine.process(jcas);
    TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length());
    aTarget.addToIndexes();
    PronounRatioFeatureExtractor extractor = new PronounRatioFeatureExtractor();
    List<Feature> features = new ArrayList<Feature>(extractor.extract(jcas, aTarget));
    Assert.assertEquals(7, features.size());
    for (Feature feature : features) {
        if (feature.getName().equals(FN_HE_RATIO)) {
            assertFeature(FN_HE_RATIO, 0.5, feature);
        } else if (feature.getName().equals(FN_WE_RATIO)) {
            assertFeature(FN_WE_RATIO, 0.0, feature);
        }
    }
}
Also used : BreakIteratorSegmenter(de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter) PronounRatioFeatureExtractor(org.dkpro.tc.features.syntax.PronounRatioFeatureExtractor) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) OpenNlpPosTagger(de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger) FeatureTestUtil.assertFeature(org.dkpro.tc.testing.FeatureTestUtil.assertFeature) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 78 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class EmoticonRatioTest method emoticonRatioFeatureExtractorTest.

@Test
public void emoticonRatioFeatureExtractorTest() throws Exception {
    AnalysisEngineDescription desc = createEngineDescription(NoOpAnnotator.class);
    AnalysisEngine engine = createEngine(desc);
    TokenBuilder<Token, Sentence> builder = TokenBuilder.create(Token.class, Sentence.class);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    builder.buildTokens(jcas, "This is a very emotional tweet ;-)");
    POS_EMO emo = new POS_EMO(jcas);
    emo.setBegin(31);
    emo.setEnd(34);
    emo.addToIndexes();
    engine.process(jcas);
    TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length());
    aTarget.addToIndexes();
    EmoticonRatio extractor = new EmoticonRatio();
    List<Feature> features = new ArrayList<Feature>(extractor.extract(jcas, aTarget));
    Assert.assertEquals(1, features.size());
    for (Feature feature : features) {
        assertFeature(EmoticonRatio.class.getSimpleName(), 0.14, feature, 0.01);
    }
}
Also used : EmoticonRatio(org.dkpro.tc.features.twitter.EmoticonRatio) POS_EMO(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.tweet.POS_EMO) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) FeatureTestUtil.assertFeature(org.dkpro.tc.testing.FeatureTestUtil.assertFeature) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 79 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class CrfSuiteFeatureFormatExtractionIterator method next.

@Override
public StringBuilder next() {
    StringBuilder sb = new StringBuilder();
    try {
        String lastSeenSeqId = idInitVal;
        boolean seqIdChanged = false;
        for (; insIdx < instances.size(); insIdx++) {
            Instance i = instances.get(insIdx);
            String id = getId(i);
            if (!lastSeenSeqId.equals(id)) {
                seqIdChanged = true;
                lastSeenSeqId = getId(i);
            }
            sb.append(LabelSubstitutor.labelReplacement(i.getOutcome()));
            sb.append("\t");
            int idx = 0;
            for (Feature f : i.getFeatures()) {
                sb.append(f.getName() + "=" + f.getValue());
                if (idx + 1 < i.getFeatures().size()) {
                    sb.append("\t");
                }
                idx++;
            }
            // Mark first line of new sequence with an additional __BOS__
            if (seqIdChanged) {
                sb.append("\t");
                sb.append("__BOS__");
                seqIdChanged = false;
            }
            // Peak ahead - seqEnd reached?
            if (insIdx + 1 < instances.size()) {
                Instance next = instances.get(insIdx + 1);
                String nextId = getId(next);
                if (!lastSeenSeqId.equals(nextId)) {
                    appendEOS(sb);
                    insIdx++;
                    break;
                }
            } else if (insIdx + 1 == instances.size()) {
                appendEOS(sb);
                insIdx++;
                // We're done
                break;
            }
            sb.append("\n");
        }
    } catch (Exception e) {
        throw new UnsupportedOperationException(e);
    }
    return sb;
}
Also used : Instance(org.dkpro.tc.api.features.Instance) Feature(org.dkpro.tc.api.features.Feature)

Example 80 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class CrfSuiteDataWriterTest method prepareFeatures.

private void prepareFeatures() throws Exception {
    List<Feature> features1 = new ArrayList<Feature>();
    features1.add(new Feature("feature1", 1.0, FeatureType.NUMERIC));
    features1.add(new Feature("feature2", 0.0, FeatureType.NUMERIC));
    features1.add(new Feature("feature3", "Water", FeatureType.STRING));
    List<Feature> features2 = new ArrayList<Feature>();
    features2.add(new Feature("feature2", 0.5, FeatureType.NUMERIC));
    features2.add(new Feature("feature1", 0.5, FeatureType.NUMERIC));
    features2.add(new Feature("feature3", "Fanta", FeatureType.STRING));
    Instance instance1 = new Instance(features1, "1");
    instance1.setSequenceId(0);
    instance1.setSequencePosition(0);
    Instance instance2 = new Instance(features2, "2");
    instance2.setSequenceId(0);
    instance2.setSequencePosition(1);
    Instance instance3 = new Instance(features1, "3");
    instance3.setSequenceId(0);
    instance3.setSequencePosition(2);
    Instance instance4 = new Instance(features1, "4");
    instance4.setSequenceId(1);
    instance4.setSequencePosition(0);
    Instance instance5 = new Instance(features2, "4");
    instance5.setSequenceId(1);
    instance5.setSequencePosition(1);
    instances.add(instance1);
    instances.add(instance2);
    instances.add(instance3);
    instances.add(instance4);
    instances.add(instance5);
}
Also used : Instance(org.dkpro.tc.api.features.Instance) ArrayList(java.util.ArrayList) Feature(org.dkpro.tc.api.features.Feature)

Aggregations

Feature (org.dkpro.tc.api.features.Feature)94 Test (org.junit.Test)48 Instance (org.dkpro.tc.api.features.Instance)30 ArrayList (java.util.ArrayList)29 HashSet (java.util.HashSet)21 FeatureTestUtil.assertFeature (org.dkpro.tc.testing.FeatureTestUtil.assertFeature)17 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)16 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)16 JCas (org.apache.uima.jcas.JCas)15 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)13 File (java.io.File)8 Attribute (weka.core.Attribute)8 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)7 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)6 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)5 Chunk (de.tudarmstadt.ukp.dkpro.core.api.syntax.type.chunk.Chunk)4 OpenNlpPosTagger (de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger)4 BreakIteratorSegmenter (de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter)4 Instances (weka.core.Instances)4 IOException (java.io.IOException)3