Search in sources :

Example 41 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class LiblinearSaveAndLoadModelDocumentRegression method regressionLoadModel.

private void regressionLoadModel(File modelFolder) throws UIMAException, IOException {
    CollectionReader reader = CollectionReaderFactory.createReader(LinewiseTextOutcomeReader.class, LinewiseTextOutcomeReader.PARAM_OUTCOME_INDEX, 0, LinewiseTextOutcomeReader.PARAM_TEXT_INDEX, 1, LinewiseTextOutcomeReader.PARAM_SOURCE_LOCATION, regressionTest, LinewiseTextOutcomeReader.PARAM_LANGUAGE, "en");
    AnalysisEngine segmenter = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
    AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
    JCas jcas = JCasFactory.createJCas();
    reader.hasNext();
    reader.getNext(jcas.getCas());
    segmenter.process(jcas);
    tcAnno.process(jcas);
    List<TextClassificationOutcome> outcomes = new ArrayList<>(JCasUtil.select(jcas, TextClassificationOutcome.class));
    assertEquals(1, outcomes.size());
    Double d = Double.valueOf(outcomes.get(0).getOutcome());
    assertTrue(d > 0.1 && d < 5);
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine)

Example 42 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class LibsvmSaveAndLoadModelDocumentRegression method regressionLoadModel.

private void regressionLoadModel(File modelFolder) throws UIMAException, IOException {
    CollectionReader reader = CollectionReaderFactory.createReader(LinewiseTextOutcomeReader.class, LinewiseTextOutcomeReader.PARAM_OUTCOME_INDEX, 0, LinewiseTextOutcomeReader.PARAM_TEXT_INDEX, 1, LinewiseTextOutcomeReader.PARAM_SOURCE_LOCATION, regressionTest, LinewiseTextOutcomeReader.PARAM_LANGUAGE, "en");
    AnalysisEngine segmenter = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
    AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
    JCas jcas = JCasFactory.createJCas();
    reader.hasNext();
    reader.getNext(jcas.getCas());
    segmenter.process(jcas);
    tcAnno.process(jcas);
    List<TextClassificationOutcome> outcomes = new ArrayList<>(JCasUtil.select(jcas, TextClassificationOutcome.class));
    assertEquals(1, outcomes.size());
    Double d = Double.valueOf(outcomes.get(0).getOutcome());
    assertTrue(d > 0.1 && d < 5);
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine)

Example 43 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class SVMHMMSaveAndLoadModelTest method loadModel.

@Test
public void loadModel() throws Exception {
    // create a model
    File modelFolder = folder.newFolder();
    ParameterSpace pSpace = getParameterSpace();
    executeSaveModelIntoTemporyFolder(pSpace, modelFolder);
    JCas jcas = JCasFactory.createJCas();
    jcas.setDocumentText("This is an example text. It has 2 sentences.");
    jcas.setDocumentLanguage("en");
    AnalysisEngine tokenizer = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
    AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_SEQUENCE_ANNOTATION, Sentence.class.getName(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
    tokenizer.process(jcas);
    tcAnno.process(jcas);
    List<TextClassificationOutcome> outcomes = new ArrayList<>(JCasUtil.select(jcas, TextClassificationOutcome.class));
    Set<String> possibleOutcome = new HashSet<>();
    possibleOutcome.add("NN");
    possibleOutcome.add("AT");
    possibleOutcome.add("DT");
    possibleOutcome.add("JJ");
    possibleOutcome.add("pct");
    possibleOutcome.add("PPS");
    possibleOutcome.add("VBG");
    possibleOutcome.add("DOD");
    possibleOutcome.add("IN");
    possibleOutcome.add("VBD");
    possibleOutcome.add("VB");
    possibleOutcome.add("BEDZ");
    possibleOutcome.add("VBN");
    possibleOutcome.add("RB");
    possibleOutcome.add("NNS");
    // 9 token + 2 punctuation marks
    assertEquals(11, outcomes.size());
    for (TextClassificationOutcome o : outcomes) {
        System.out.println(o.getOutcome());
        assertTrue(possibleOutcome.contains(o.getOutcome()));
    }
}
Also used : ParameterSpace(org.dkpro.lab.task.ParameterSpace) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) File(java.io.File) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 44 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class UnitOutcomeAnnotator method process.

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    List<Token> tokens = new ArrayList<Token>(JCasUtil.select(aJCas, Token.class));
    for (Token token : tokens) {
        TextClassificationTarget aTarget = new TextClassificationTarget(aJCas, token.getBegin(), token.getEnd());
        aTarget.setId(tcId++);
        aTarget.setSuffix(token.getCoveredText());
        aTarget.addToIndexes();
        TextClassificationOutcome outcome = new TextClassificationOutcome(aJCas, token.getBegin(), token.getEnd());
        outcome.setOutcome(getTextClassificationOutcome(aJCas, aTarget));
        outcome.addToIndexes();
    }
}
Also used : TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) ArrayList(java.util.ArrayList) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)

Example 45 with TextClassificationOutcome

use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.

the class LiblinearSaveAndLoadModelDocumentSingleLabelTest method unitLoadAndUseModel.

private static void unitLoadAndUseModel(File modelFolder) throws Exception {
    AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
    CollectionReader reader = CollectionReaderFactory.createReader(TeiReader.class, TeiReader.PARAM_SOURCE_LOCATION, unitTrainFolder, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_PATTERNS, Arrays.asList(TeiReader.INCLUDE_PREFIX + "a02.xml"));
    List<TextClassificationOutcome> outcomes = new ArrayList<>();
    JCas jcas = JCasFactory.createJCas();
    jcas.setDocumentLanguage("en");
    reader.getNext(jcas.getCas());
    tcAnno.process(jcas);
    outcomes.addAll(JCasUtil.select(jcas, TextClassificationOutcome.class));
    Set<String> possibleOutcomes = new HashSet<>();
    possibleOutcomes.add("AT");
    possibleOutcomes.add("NP");
    possibleOutcomes.add("pct");
    possibleOutcomes.add("WDT");
    possibleOutcomes.add("JJ");
    possibleOutcomes.add("VBD");
    possibleOutcomes.add("NNS");
    possibleOutcomes.add("TO");
    possibleOutcomes.add("VBN");
    possibleOutcomes.add("IN");
    possibleOutcomes.add("CC");
    possibleOutcomes.add("NN");
    possibleOutcomes.add("VBD");
    possibleOutcomes.add("AP");
    possibleOutcomes.add("HVD");
    assertEquals(31, outcomes.size());
    for (TextClassificationOutcome o : outcomes) {
        assertTrue(possibleOutcomes.contains(o.getOutcome()));
    }
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) HashSet(java.util.HashSet)

Aggregations

TextClassificationOutcome (org.dkpro.tc.api.type.TextClassificationOutcome)59 JCas (org.apache.uima.jcas.JCas)29 ArrayList (java.util.ArrayList)27 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)19 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)18 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)16 CollectionReader (org.apache.uima.collection.CollectionReader)15 CollectionException (org.apache.uima.collection.CollectionException)9 CASException (org.apache.uima.cas.CASException)8 JCasId (org.dkpro.tc.api.type.JCasId)8 TextClassificationSequence (org.dkpro.tc.api.type.TextClassificationSequence)7 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)6 Test (org.junit.Test)6 DocumentMetaData (de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData)5 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)5 File (java.io.File)5 IOException (java.io.IOException)4 ResourceInitializationException (org.apache.uima.resource.ResourceInitializationException)4 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)4 HashSet (java.util.HashSet)3