Search in sources :

Example 26 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class TestTaskUtils method initJCas.

private JCas initJCas(boolean setUnitIdAsPartOfTheInstanceId) throws Exception {
    AnalysisEngine engine = AnalysisEngineFactory.createEngine(NoOpAnnotator.class);
    JCas jCas = engine.newJCas();
    JCasId id = new JCasId(jCas);
    id.setId(4711);
    id.addToIndexes();
    DocumentMetaData meta = new DocumentMetaData(jCas);
    meta.setDocumentTitle("title");
    meta.setDocumentId("4711");
    meta.addToIndexes();
    String[][] tokens = { // sequence 1
    { "a", "DT" }, // sequence 1
    { "car", "NN" }, // sequence 1
    { "drives", "VBZ" }, // sequence 2
    { "the", "DT" }, // sequence 2
    { "hedgehogs", "NN" }, // sequence 2
    { "dies", "VBZ" } };
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < tokens.length; i++) {
        int start = sb.length();
        int end = start + tokens[i][0].length();
        TextClassificationTarget unit = new TextClassificationTarget(jCas, start, end);
        if (setUnitIdAsPartOfTheInstanceId) {
            unit.setSuffix(tokens[i][0]);
        }
        unit.setId(i);
        unit.addToIndexes();
        TextClassificationOutcome outcome = new TextClassificationOutcome(jCas, start, end);
        outcome.setOutcome(tokens[i][1]);
        outcome.addToIndexes();
        sb.append(tokens[i][0]);
        if (i + 1 < tokens.length) {
            sb.append(" ");
        }
    }
    String text = sb.toString();
    jCas.setDocumentText(text);
    int lenSeq1 = tokens[0][0].length() + 1 + tokens[1][0].length() + 1 + tokens[2][0].length();
    TextClassificationSequence seq1 = new TextClassificationSequence(jCas, 0, lenSeq1);
    seq1.addToIndexes();
    TextClassificationSequence seq2 = new TextClassificationSequence(jCas, lenSeq1 + 1, text.length());
    seq2.addToIndexes();
    return jCas;
}
Also used : JCasId(org.dkpro.tc.api.type.JCasId) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) DocumentMetaData(de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData) TextClassificationSequence(org.dkpro.tc.api.type.TextClassificationSequence) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine)

Example 27 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class FoldClassificationUnitCasMultiplier method setTargetAnnotation.

private void setTargetAnnotation(JCas copyJCas) {
    if (useSequences) {
        for (AnnotationFS s : buf) {
            TextClassificationSequence seq = new TextClassificationSequence(copyJCas, s.getBegin(), s.getEnd());
            seq.addToIndexes();
            seq.setId(seqCounter++);
            // re-add the units that are covered by those sequences
            for (TextClassificationTarget u : seqModeUnitsCoveredBySequenceAnno) {
                u.addToIndexes();
            }
            seqModeUnitsCoveredBySequenceAnno = new ArrayList<>();
        }
    } else {
        for (AnnotationFS u : buf) {
            TextClassificationTarget unit = new TextClassificationTarget(copyJCas, u.getBegin(), u.getEnd());
            unit.addToIndexes();
            unit.setId(unitCounter);
            unitCounter++;
        }
    }
}
Also used : AnnotationFS(org.apache.uima.cas.text.AnnotationFS) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) TextClassificationSequence(org.dkpro.tc.api.type.TextClassificationSequence)

Example 28 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class TcAnnotator method addTCUnitAndOutcomeAnnotation.

private void addTCUnitAndOutcomeAnnotation(JCas aJCas) {
    Type type = aJCas.getCas().getTypeSystem().getType(nameUnit);
    Collection<AnnotationFS> unitAnnotation = CasUtil.select(aJCas.getCas(), type);
    for (AnnotationFS unit : unitAnnotation) {
        TextClassificationTarget tcs = new TextClassificationTarget(aJCas, unit.getBegin(), unit.getEnd());
        tcs.addToIndexes();
        TextClassificationOutcome tco = new TextClassificationOutcome(aJCas, unit.getBegin(), unit.getEnd());
        tco.setOutcome(Constants.TC_OUTCOME_DUMMY_VALUE);
        tco.addToIndexes();
    }
}
Also used : AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget)

Example 29 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class TestFoldUtil method countNumberOfTextClassificationSequencesAndUnitsPerCas.

private List<List<Integer>> countNumberOfTextClassificationSequencesAndUnitsPerCas(List<File> writtenBins) throws Exception {
    List<List<Integer>> arrayList = new ArrayList<>();
    List<Integer> units = new ArrayList<>();
    List<Integer> seq = new ArrayList<>();
    for (File f : writtenBins) {
        JCas jcas = JCasFactory.createJCas();
        CollectionReader createReader = createReader(jcas, f);
        createReader.getNext(jcas.getCas());
        Collection<TextClassificationTarget> colUni = JCasUtil.select(jcas, TextClassificationTarget.class);
        units.add(colUni.size());
        Collection<TextClassificationSequence> colSeq = JCasUtil.select(jcas, TextClassificationSequence.class);
        seq.add(colSeq.size());
    }
    arrayList.add(seq);
    arrayList.add(units);
    return arrayList;
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) ArrayList(java.util.ArrayList) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) ArrayList(java.util.ArrayList) List(java.util.List) TextClassificationSequence(org.dkpro.tc.api.type.TextClassificationSequence) File(java.io.File)

Example 30 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class TestFoldUtil method setUnit.

private void setUnit(JCas jcas, int beg, int end) {
    TextClassificationTarget tcu = new TextClassificationTarget(jcas, beg, end);
    tcu.addToIndexes();
}
Also used : TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget)

Aggregations

TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)61 JCas (org.apache.uima.jcas.JCas)29 ArrayList (java.util.ArrayList)22 TextClassificationOutcome (org.dkpro.tc.api.type.TextClassificationOutcome)18 Feature (org.dkpro.tc.api.features.Feature)16 Test (org.junit.Test)16 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)12 TextClassificationSequence (org.dkpro.tc.api.type.TextClassificationSequence)12 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)11 JCasId (org.dkpro.tc.api.type.JCasId)11 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)8 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)7 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)7 FeatureTestUtil.assertFeature (org.dkpro.tc.testing.FeatureTestUtil.assertFeature)6 CollectionReader (org.apache.uima.collection.CollectionReader)5 FeatureExtractorResource_ImplBase (org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase)5 DocumentMetaData (de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData)4 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)4 OpenNlpPosTagger (de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger)4 BreakIteratorSegmenter (de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter)4