Search in sources :

Example 36 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class TestReaderRegression method getNext.

@Override
public void getNext(CAS aCAS) throws IOException, CollectionException {
    super.getNext(aCAS);
    JCas jcas;
    try {
        jcas = aCAS.getJCas();
        JCasId id = new JCasId(jcas);
        id.setId(jcasId++);
        id.addToIndexes();
    } catch (CASException e) {
        throw new CollectionException();
    }
    TextClassificationOutcome outcome = new TextClassificationOutcome(jcas);
    outcome.setOutcome(getTextClassificationOutcome(jcas));
    outcome.addToIndexes();
}
Also used : JCasId(org.dkpro.tc.api.type.JCasId) CollectionException(org.apache.uima.collection.CollectionException) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) JCas(org.apache.uima.jcas.JCas) CASException(org.apache.uima.cas.CASException)

Example 37 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class PairTwentyNewsgroupsReader method getNext.

@Override
public void getNext(JCas jcas) throws IOException, CollectionException {
    doc1 = new File(listOfFiles.get(currentParsedFilePointer).get(0));
    doc2 = new File(listOfFiles.get(currentParsedFilePointer).get(1));
    super.getNext(jcas);
    TextClassificationOutcome outcome = new TextClassificationOutcome(jcas);
    String outcomeString = getTextClassificationOutcome(jcas);
    outcome.setOutcome(outcomeString);
    outcome.addToIndexes();
    currentParsedFilePointer++;
}
Also used : TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) File(java.io.File)

Example 38 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class SequenceOutcomeAnnotator method process.

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    for (Sentence sent : JCasUtil.select(aJCas, Sentence.class)) {
        TextClassificationSequence sequence = new TextClassificationSequence(aJCas, sent.getBegin(), sent.getEnd());
        sequence.addToIndexes();
        List<Token> tokens = JCasUtil.selectCovered(aJCas, Token.class, sent);
        for (Token token : tokens) {
            TextClassificationTarget unit = new TextClassificationTarget(aJCas, token.getBegin(), token.getEnd());
            unit.setId(tcId++);
            unit.setSuffix(token.getCoveredText());
            unit.addToIndexes();
            TextClassificationOutcome outcome = new TextClassificationOutcome(aJCas, token.getBegin(), token.getEnd());
            outcome.setOutcome(getTextClassificationOutcome(aJCas, unit));
            outcome.addToIndexes();
        }
    }
}
Also used : TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) TextClassificationSequence(org.dkpro.tc.api.type.TextClassificationSequence) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)

Example 39 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class STSReader method getNext.

@Override
public void getNext(JCas jcas) throws IOException, CollectionException {
    super.getNext(jcas);
    jcas.setDocumentText("");
    TextClassificationOutcome outcome = new TextClassificationOutcome(jcas);
    outcome.setOutcome(getTextClassificationOutcome(jcas));
    outcome.addToIndexes();
    // as we are creating more than one CAS out of a single file, we need to have different
    // document titles and URIs for each CAS
    // otherwise, serialized CASes will be overwritten
    DocumentMetaData dmd = DocumentMetaData.get(jcas);
    dmd.setDocumentTitle(dmd.getDocumentTitle() + "-" + fileOffset);
    dmd.setDocumentUri(dmd.getDocumentUri() + "-" + fileOffset);
    fileOffset++;
}
Also used : TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) DocumentMetaData(de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData)

Example 40 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class CRFSuiteSaveAndLoadModelTest method loadModelArow.

@Test
public void loadModelArow() throws Exception {
    Map<String, Object> config = new HashMap<>();
    config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new CrfSuiteAdapter(), CrfSuiteAdapter.ALGORITHM_ADAPTIVE_REGULARIZATION_OF_WEIGHT_VECTOR, "-p", "max_iterations=2" });
    config.put(DIM_DATA_WRITER, new CrfSuiteAdapter().getDataWriterClass().getName());
    config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
    // create a model
    File modelFolder = folder.newFolder();
    ParameterSpace pSpace = getParameterSpace(mlas);
    executeSaveModelIntoTemporyFolder(pSpace, modelFolder);
    JCas jcas = JCasFactory.createJCas();
    jcas.setDocumentText("This is an example text. It has 2 sentences.");
    jcas.setDocumentLanguage("en");
    AnalysisEngine tokenizer = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
    AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_SEQUENCE_ANNOTATION, Sentence.class.getName(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
    tokenizer.process(jcas);
    tcAnno.process(jcas);
    List<TextClassificationOutcome> outcomes = new ArrayList<>(JCasUtil.select(jcas, TextClassificationOutcome.class));
    // 9 token + 2 punctuation marks
    assertEquals(11, outcomes.size());
    for (TextClassificationOutcome o : outcomes) {
        String label = o.getOutcome();
        assertTrue(postags.contains(label));
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) CrfSuiteAdapter(org.dkpro.tc.ml.crfsuite.CrfSuiteAdapter) WekaAdapter(org.dkpro.tc.ml.weka.WekaAdapter) ParameterSpace(org.dkpro.lab.task.ParameterSpace) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) HashMap(java.util.HashMap) Map(java.util.Map) File(java.io.File) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Aggregations

TextClassificationOutcome (org.dkpro.tc.api.type.TextClassificationOutcome)59 JCas (org.apache.uima.jcas.JCas)29 ArrayList (java.util.ArrayList)27 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)19 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)18 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)16 CollectionReader (org.apache.uima.collection.CollectionReader)15 CollectionException (org.apache.uima.collection.CollectionException)9 CASException (org.apache.uima.cas.CASException)8 JCasId (org.dkpro.tc.api.type.JCasId)8 TextClassificationSequence (org.dkpro.tc.api.type.TextClassificationSequence)7 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)6 Test (org.junit.Test)6 DocumentMetaData (de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData)5 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)5 File (java.io.File)5 IOException (java.io.IOException)4 ResourceInitializationException (org.apache.uima.resource.ResourceInitializationException)4 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)4 HashSet (java.util.HashSet)3