Search in sources :

Example 41 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class IdfPairMetaCollector method getNgramsFD.

@Override
protected FrequencyDistribution<String> getNgramsFD(JCas jcas) throws TextClassificationException {
    TextClassificationTarget aTarget = JCasUtil.selectSingle(jcas, TextClassificationTarget.class);
    FrequencyDistribution<String> toReturn = NGramUtils.getDocumentNgrams(jcas, aTarget, true, false, 1, 1, stopwords, ngramAnnotationType);
    return toReturn;
}
Also used : TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget)

Example 42 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class TestTargetSurfaceFormContextFeature method setUp.

private Object[] setUp() throws Exception {
    JCas jcas = JCasFactory.createJCas();
    jcas.setDocumentText("It is raining all day");
    DocumentMetaData dmd = new DocumentMetaData(jcas);
    dmd.setDocumentId("1");
    dmd.addToIndexes();
    AnalysisEngine engine = createEngine(BreakIteratorSegmenter.class);
    engine.process(jcas.getCas());
    ArrayList<Token> arrayList = new ArrayList<Token>(JCasUtil.select(jcas, Token.class));
    Token bb = arrayList.get(0);
    TextClassificationTarget tcbb = new TextClassificationTarget(jcas, bb.getBegin(), bb.getEnd());
    tcbb.addToIndexes();
    Token b = arrayList.get(1);
    TextClassificationTarget tcb = new TextClassificationTarget(jcas, b.getBegin(), b.getEnd());
    tcb.addToIndexes();
    Token c = arrayList.get(2);
    TextClassificationTarget tcu = new TextClassificationTarget(jcas, c.getBegin(), c.getEnd());
    tcu.addToIndexes();
    Token n = arrayList.get(3);
    TextClassificationTarget tcn = new TextClassificationTarget(jcas, n.getBegin(), n.getEnd());
    tcn.addToIndexes();
    Token nn = arrayList.get(4);
    TextClassificationTarget tcnn = new TextClassificationTarget(jcas, nn.getBegin(), nn.getEnd());
    tcnn.addToIndexes();
    return new Object[] { jcas, tcu };
}
Also used : ArrayList(java.util.ArrayList) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) DocumentMetaData(de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine)

Example 43 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class NumberOfHashTagsTest method numberOfHashTagsFeatureExtractorTest.

@Test
public void numberOfHashTagsFeatureExtractorTest() throws Exception {
    AnalysisEngineDescription desc = createEngineDescription(NoOpAnnotator.class);
    AnalysisEngine engine = createEngine(desc);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    jcas.setDocumentText("This is a very #emotional tweet ;-) #icouldcry #ILoveHashTags");
    engine.process(jcas);
    TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length());
    aTarget.addToIndexes();
    NumberOfHashTags extractor = new NumberOfHashTags();
    List<Feature> features = new ArrayList<Feature>(extractor.extract(jcas, aTarget));
    Assert.assertEquals(1, features.size());
    for (Feature feature : features) {
        assertFeature(NumberOfHashTags.class.getSimpleName(), 3, feature);
    }
}
Also used : AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) NumberOfHashTags(org.dkpro.tc.features.twitter.NumberOfHashTags) FeatureTestUtil.assertFeature(org.dkpro.tc.testing.FeatureTestUtil.assertFeature) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 44 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class SequenceOutcomeReader method setTextClassificationTarget.

protected void setTextClassificationTarget(JCas aJCas, String token, int begin, int end) {
    TextClassificationTarget aTarget = new TextClassificationTarget(aJCas, begin, end);
    // This improves readability of the id2outcome report
    aTarget.setSuffix(token);
    aTarget.addToIndexes();
}
Also used : TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget)

Example 45 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class CrfSuiteLoadModelConnector method getInstancesInSequence.

private List<Instance> getInstancesInSequence(FeatureExtractorResource_ImplBase[] featureExtractors, JCas jcas, TextClassificationSequence sequence, boolean addInstanceId, int sequenceId) throws Exception {
    List<Instance> instances = new ArrayList<Instance>();
    int jcasId = JCasUtil.selectSingle(jcas, JCasId.class).getId();
    List<TextClassificationTarget> seqTargets = JCasUtil.selectCovered(jcas, TextClassificationTarget.class, sequence);
    for (TextClassificationTarget aTarget : seqTargets) {
        Instance instance = new Instance();
        if (addInstanceId) {
            instance.addFeature(InstanceIdFeature.retrieve(jcas, aTarget, sequenceId));
        }
        // execute feature extractors and add features to instance
        try {
            for (FeatureExtractorResource_ImplBase featExt : featureExtractors) {
                instance.addFeatures(((FeatureExtractor) featExt).extract(jcas, aTarget));
            }
        } catch (TextClassificationException e) {
            throw new AnalysisEngineProcessException(e);
        }
        // set and write outcome label(s)
        instance.setOutcomes(getOutcomes(jcas, aTarget));
        instance.setJcasId(jcasId);
        instance.setSequenceId(sequenceId);
        instance.setSequencePosition(aTarget.getId());
        instances.add(instance);
    }
    return instances;
}
Also used : JCasId(org.dkpro.tc.api.type.JCasId) Instance(org.dkpro.tc.api.features.Instance) TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) ArrayList(java.util.ArrayList) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) FeatureExtractorResource_ImplBase(org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException)

Aggregations

TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)61 JCas (org.apache.uima.jcas.JCas)29 ArrayList (java.util.ArrayList)22 TextClassificationOutcome (org.dkpro.tc.api.type.TextClassificationOutcome)18 Feature (org.dkpro.tc.api.features.Feature)16 Test (org.junit.Test)16 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)12 TextClassificationSequence (org.dkpro.tc.api.type.TextClassificationSequence)12 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)11 JCasId (org.dkpro.tc.api.type.JCasId)11 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)8 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)7 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)7 FeatureTestUtil.assertFeature (org.dkpro.tc.testing.FeatureTestUtil.assertFeature)6 CollectionReader (org.apache.uima.collection.CollectionReader)5 FeatureExtractorResource_ImplBase (org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase)5 DocumentMetaData (de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData)4 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)4 OpenNlpPosTagger (de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger)4 BreakIteratorSegmenter (de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter)4