Search in sources :

Example 16 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class LuceneKeywordCPFE method extract.

@Override
public Set<Feature> extract(JCas view1, JCas view2) throws TextClassificationException {
    TextClassificationTarget aTarget1 = JCasUtil.selectSingle(view1, TextClassificationTarget.class);
    TextClassificationTarget aTarget2 = JCasUtil.selectSingle(view2, TextClassificationTarget.class);
    FrequencyDistribution<String> view1Ngrams = KeywordNGramUtils.getDocumentKeywordNgrams(view1, aTarget1, ngramMinN1, ngramMaxN1, markSentenceBoundary, markSentenceLocation, includeCommas, keywords);
    FrequencyDistribution<String> view2Ngrams = KeywordNGramUtils.getDocumentKeywordNgrams(view2, aTarget2, ngramMinN2, ngramMaxN2, markSentenceBoundary, markSentenceLocation, includeCommas, keywords);
    FrequencyDistribution<String> documentComboNgrams = ComboUtils.getCombinedNgrams(view1Ngrams, view2Ngrams, ngramMinNCombo, ngramMaxNCombo, ngramUseSymmetricalCombos);
    prefix = "comboKNG";
    Set<Feature> features = new HashSet<Feature>();
    addToFeatureArray(documentComboNgrams, topKSetCombo, features);
    return features;
}
Also used : TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) Feature(org.dkpro.tc.api.features.Feature) HashSet(java.util.HashSet)

Example 17 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class NETest method nEFeatureExtractorTest.

@Test
public void nEFeatureExtractorTest() throws Exception {
    AnalysisEngine engine = createEngine(NoOpAnnotator.class);
    JCas jcas = engine.newJCas();
    engine.process(jcas);
    TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, 22);
    aTarget.addToIndexes();
    Location l1 = new Location(jcas, 0, 5);
    Person p1 = new Person(jcas, 0, 5);
    Organization o1 = new Organization(jcas, 0, 5);
    Sentence s1 = new Sentence(jcas, 0, 15);
    Sentence s2 = new Sentence(jcas, 15, 22);
    l1.addToIndexes();
    p1.addToIndexes();
    o1.addToIndexes();
    s1.addToIndexes();
    s2.addToIndexes();
    NamedEntityPerSentenceRatio extractor = new NamedEntityPerSentenceRatio();
    Set<Feature> features1 = extractor.extract(jcas, aTarget);
    assertEquals(6, features1.size());
    testFeatures(features1, 1, 1, 1, 0.5f, 0.5f, 0.5f);
}
Also used : Organization(de.tudarmstadt.ukp.dkpro.core.api.ner.type.Organization) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) Person(de.tudarmstadt.ukp.dkpro.core.api.ner.type.Person) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Location(de.tudarmstadt.ukp.dkpro.core.api.ner.type.Location) Test(org.junit.Test)

Example 18 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class QuestionRatioTest method questionRatioFeatureExtractorTest.

@Test
public void questionRatioFeatureExtractorTest() throws Exception {
    AnalysisEngineDescription desc = createEngineDescription(BreakIteratorSegmenter.class);
    AnalysisEngine engine = createEngine(desc);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    jcas.setDocumentText("Is he a tester???? Really?? He is a tester! Oh yes.");
    engine.process(jcas);
    TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length());
    aTarget.addToIndexes();
    QuestionsRatioFeatureExtractor extractor = new QuestionsRatioFeatureExtractor();
    List<Feature> features = new ArrayList<Feature>(extractor.extract(jcas, aTarget));
    Assert.assertEquals(1, features.size());
    for (Feature feature : features) {
        assertFeature(FN_QUESTION_RATIO, 0.5, feature);
    }
}
Also used : AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) FeatureTestUtil.assertFeature(org.dkpro.tc.testing.FeatureTestUtil.assertFeature) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 19 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class InstanceIdFeatureTest method instanceIdFeatureTest.

@Test
public void instanceIdFeatureTest() throws Exception {
    AnalysisEngine engine = createEngine(NoOpAnnotator.class);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    engine.process(jcas);
    TextClassificationTarget unit1 = new TextClassificationTarget(jcas, 0, 1);
    unit1.setId(0);
    unit1.addToIndexes();
    JCasId id = new JCasId(jcas);
    id.setId(123);
    id.addToIndexes();
    Feature feature = InstanceIdFeature.retrieve(jcas, unit1);
    Feature feature2 = InstanceIdFeature.retrieve(jcas);
    Feature feature3 = InstanceIdFeature.retrieve(jcas, unit1, 5);
    assertEquals(feature.getValue(), "123_0");
    assertEquals(feature2.getValue(), "123");
    assertEquals(feature3.getValue(), "123_5_0");
}
Also used : JCasId(org.dkpro.tc.api.type.JCasId) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 20 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class DocumentModeAnnotator method process.

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    if (!featureMode.equals(Constants.FM_DOCUMENT)) {
        return;
    }
    if (JCasUtil.exists(aJCas, TextClassificationTarget.class)) {
        return;
    }
    TextClassificationTarget aTarget = new TextClassificationTarget(aJCas, 0, aJCas.getDocumentText().length());
    aTarget.addToIndexes();
}
Also used : TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget)

Aggregations

TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)61 JCas (org.apache.uima.jcas.JCas)29 ArrayList (java.util.ArrayList)22 TextClassificationOutcome (org.dkpro.tc.api.type.TextClassificationOutcome)18 Feature (org.dkpro.tc.api.features.Feature)16 Test (org.junit.Test)16 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)12 TextClassificationSequence (org.dkpro.tc.api.type.TextClassificationSequence)12 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)11 JCasId (org.dkpro.tc.api.type.JCasId)11 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)8 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)7 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)7 FeatureTestUtil.assertFeature (org.dkpro.tc.testing.FeatureTestUtil.assertFeature)6 CollectionReader (org.apache.uima.collection.CollectionReader)5 FeatureExtractorResource_ImplBase (org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase)5 DocumentMetaData (de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData)4 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)4 OpenNlpPosTagger (de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger)4 BreakIteratorSegmenter (de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter)4