Search in sources :

Example 11 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class NGramUtilsTest method characterBiGrams.

@Test
public void characterBiGrams() throws Exception {
    String text = "A house";
    JCas jcas = JCasFactory.createJCas();
    jcas.setDocumentLanguage("en");
    jcas.setDocumentText(text);
    JCasBuilder cb = new JCasBuilder(jcas);
    for (String token : text.split(" ")) {
        cb.add(token, Token.class);
    }
    TextClassificationTarget tu = new TextClassificationTarget(jcas, 2, 7);
    tu.addToIndexes();
    FrequencyDistribution<String> ngrams = CharacterNGramMC.getAnnotationCharacterNgrams(tu, false, 2, 3, '^', '$');
    for (String s : ngrams.getKeys()) {
        System.out.println(s);
    }
    assertEquals(11, ngrams.getN());
    assertTrue(ngrams.contains("^h"));
    assertTrue(ngrams.contains("ho"));
    assertTrue(ngrams.contains("ou"));
    assertTrue(ngrams.contains("us"));
    assertTrue(ngrams.contains("se"));
    assertTrue(ngrams.contains("se$"));
    assertTrue(ngrams.contains("^ho"));
    assertTrue(ngrams.contains("hou"));
    assertTrue(ngrams.contains("ous"));
    assertTrue(ngrams.contains("use"));
    assertTrue(ngrams.contains("se$"));
}
Also used : JCasBuilder(org.apache.uima.fit.factory.JCasBuilder) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) Test(org.junit.Test)

Example 12 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class TestReaderSingleLabelUnitReader method getNext.

@Override
public void getNext(CAS aCAS) throws IOException, CollectionException {
    super.getNext(aCAS);
    JCas jcas;
    try {
        jcas = aCAS.getJCas();
        JCasId id = new JCasId(jcas);
        id.setId(jcasId++);
        id.addToIndexes();
    } catch (CASException e) {
        throw new CollectionException();
    }
    String documentText = aCAS.getDocumentText();
    int s = 0;
    for (String t : documentText.split(" ")) {
        int e = documentText.indexOf(t, s) + t.length();
        new TextClassificationTarget(jcas, s, e).addToIndexes();
        new TextClassificationOutcome(jcas, s, e).addToIndexes();
        s += 1;
    }
}
Also used : JCasId(org.dkpro.tc.api.type.JCasId) CollectionException(org.apache.uima.collection.CollectionException) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) CASException(org.apache.uima.cas.CASException)

Example 13 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class SequenceOutcomeReaderTest method testSkipLineReader.

@Test
public void testSkipLineReader() throws Exception {
    CollectionReader reader = CollectionReaderFactory.createReader(SequenceOutcomeReader.class, SequenceOutcomeReader.PARAM_SOURCE_LOCATION, "src/test/resources/sequence/posDummy.txt", SequenceOutcomeReader.PARAM_SKIP_LINES_START_WITH_STRING, "#");
    List<List<String>> readSequences = new ArrayList<>();
    List<List<String>> readOutcomes = new ArrayList<>();
    while (reader.hasNext()) {
        JCas theJCas = JCasFactory.createJCas();
        reader.getNext(theJCas.getCas());
        Collection<TextClassificationSequence> sequence = JCasUtil.select(theJCas, TextClassificationSequence.class);
        for (TextClassificationSequence s : sequence) {
            List<TextClassificationTarget> targets = JCasUtil.selectCovered(theJCas, TextClassificationTarget.class, s);
            List<String> tokens = new ArrayList<>();
            for (TextClassificationTarget target : targets) {
                tokens.add(target.getCoveredText());
            }
            readSequences.add(tokens);
        }
        Collection<TextClassificationOutcome> outcomeAnnotations = JCasUtil.select(theJCas, TextClassificationOutcome.class);
        List<String> outcomes = new ArrayList<>();
        for (TextClassificationOutcome o : outcomeAnnotations) {
            outcomes.add(o.getOutcome());
        }
        readOutcomes.add(outcomes);
    }
    assertEquals(4, readSequences.get(1).size());
    // 2 - tokens
    assertEquals("This2", readSequences.get(1).get(0));
    assertEquals("is2", readSequences.get(1).get(1));
    assertEquals("a2", readSequences.get(1).get(2));
    assertEquals("!", readSequences.get(1).get(3));
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) ArrayList(java.util.ArrayList) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) ArrayList(java.util.ArrayList) List(java.util.List) TextClassificationSequence(org.dkpro.tc.api.type.TextClassificationSequence) Test(org.junit.Test)

Example 14 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class TestTargetSurfaceFormContextFeature method testTokenFeatureExtractors.

@Test
public void testTokenFeatureExtractors() throws Exception {
    Object[] o = setUp();
    JCas jcas = (JCas) o[0];
    TextClassificationTarget tcu = (TextClassificationTarget) o[1];
    assertResult(jcas, tcu, -4, TargetSurfaceFormContextFeature.OUT_OF_BOUNDARY);
    assertResult(jcas, tcu, -3, TargetSurfaceFormContextFeature.BEG_OF_SEQUENCE);
    assertResult(jcas, tcu, -2, "it");
    assertResult(jcas, tcu, -1, "is");
    assertResult(jcas, tcu, 0, "raining");
    assertResult(jcas, tcu, +1, "all");
    assertResult(jcas, tcu, +2, "day");
    assertResult(jcas, tcu, +3, TargetSurfaceFormContextFeature.END_OF_SEQUENCE);
    assertResult(jcas, tcu, +4, TargetSurfaceFormContextFeature.OUT_OF_BOUNDARY);
}
Also used : TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) Test(org.junit.Test)

Example 15 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class FolderwiseDataReader method setTextClassificationTarget.

protected void setTextClassificationTarget(JCas aJCas, Resource currentFile, int begin, int end) {
    TextClassificationTarget aTarget = new TextClassificationTarget(aJCas, begin, end);
    aTarget.addToIndexes();
}
Also used : TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget)

Aggregations

TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)61 JCas (org.apache.uima.jcas.JCas)29 ArrayList (java.util.ArrayList)22 TextClassificationOutcome (org.dkpro.tc.api.type.TextClassificationOutcome)18 Feature (org.dkpro.tc.api.features.Feature)16 Test (org.junit.Test)16 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)12 TextClassificationSequence (org.dkpro.tc.api.type.TextClassificationSequence)12 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)11 JCasId (org.dkpro.tc.api.type.JCasId)11 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)8 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)7 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)7 FeatureTestUtil.assertFeature (org.dkpro.tc.testing.FeatureTestUtil.assertFeature)6 CollectionReader (org.apache.uima.collection.CollectionReader)5 FeatureExtractorResource_ImplBase (org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase)5 DocumentMetaData (de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData)4 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)4 OpenNlpPosTagger (de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger)4 BreakIteratorSegmenter (de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter)4