use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class LiblinearSaveAndLoadModelDocumentRegression method regressionLoadModel.
private void regressionLoadModel(File modelFolder) throws UIMAException, IOException {
CollectionReader reader = CollectionReaderFactory.createReader(LinewiseTextOutcomeReader.class, LinewiseTextOutcomeReader.PARAM_OUTCOME_INDEX, 0, LinewiseTextOutcomeReader.PARAM_TEXT_INDEX, 1, LinewiseTextOutcomeReader.PARAM_SOURCE_LOCATION, regressionTest, LinewiseTextOutcomeReader.PARAM_LANGUAGE, "en");
AnalysisEngine segmenter = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
JCas jcas = JCasFactory.createJCas();
reader.hasNext();
reader.getNext(jcas.getCas());
segmenter.process(jcas);
tcAnno.process(jcas);
List<TextClassificationOutcome> outcomes = new ArrayList<>(JCasUtil.select(jcas, TextClassificationOutcome.class));
assertEquals(1, outcomes.size());
Double d = Double.valueOf(outcomes.get(0).getOutcome());
assertTrue(d > 0.1 && d < 5);
}
use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class LibsvmSaveAndLoadModelDocumentRegression method regressionLoadModel.
private void regressionLoadModel(File modelFolder) throws UIMAException, IOException {
CollectionReader reader = CollectionReaderFactory.createReader(LinewiseTextOutcomeReader.class, LinewiseTextOutcomeReader.PARAM_OUTCOME_INDEX, 0, LinewiseTextOutcomeReader.PARAM_TEXT_INDEX, 1, LinewiseTextOutcomeReader.PARAM_SOURCE_LOCATION, regressionTest, LinewiseTextOutcomeReader.PARAM_LANGUAGE, "en");
AnalysisEngine segmenter = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
JCas jcas = JCasFactory.createJCas();
reader.hasNext();
reader.getNext(jcas.getCas());
segmenter.process(jcas);
tcAnno.process(jcas);
List<TextClassificationOutcome> outcomes = new ArrayList<>(JCasUtil.select(jcas, TextClassificationOutcome.class));
assertEquals(1, outcomes.size());
Double d = Double.valueOf(outcomes.get(0).getOutcome());
assertTrue(d > 0.1 && d < 5);
}
use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class SVMHMMSaveAndLoadModelTest method loadModel.
@Test
public void loadModel() throws Exception {
// create a model
File modelFolder = folder.newFolder();
ParameterSpace pSpace = getParameterSpace();
executeSaveModelIntoTemporyFolder(pSpace, modelFolder);
JCas jcas = JCasFactory.createJCas();
jcas.setDocumentText("This is an example text. It has 2 sentences.");
jcas.setDocumentLanguage("en");
AnalysisEngine tokenizer = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_SEQUENCE_ANNOTATION, Sentence.class.getName(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
tokenizer.process(jcas);
tcAnno.process(jcas);
List<TextClassificationOutcome> outcomes = new ArrayList<>(JCasUtil.select(jcas, TextClassificationOutcome.class));
Set<String> possibleOutcome = new HashSet<>();
possibleOutcome.add("NN");
possibleOutcome.add("AT");
possibleOutcome.add("DT");
possibleOutcome.add("JJ");
possibleOutcome.add("pct");
possibleOutcome.add("PPS");
possibleOutcome.add("VBG");
possibleOutcome.add("DOD");
possibleOutcome.add("IN");
possibleOutcome.add("VBD");
possibleOutcome.add("VB");
possibleOutcome.add("BEDZ");
possibleOutcome.add("VBN");
possibleOutcome.add("RB");
possibleOutcome.add("NNS");
// 9 token + 2 punctuation marks
assertEquals(11, outcomes.size());
for (TextClassificationOutcome o : outcomes) {
System.out.println(o.getOutcome());
assertTrue(possibleOutcome.contains(o.getOutcome()));
}
}
use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class UnitOutcomeAnnotator method process.
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
List<Token> tokens = new ArrayList<Token>(JCasUtil.select(aJCas, Token.class));
for (Token token : tokens) {
TextClassificationTarget aTarget = new TextClassificationTarget(aJCas, token.getBegin(), token.getEnd());
aTarget.setId(tcId++);
aTarget.setSuffix(token.getCoveredText());
aTarget.addToIndexes();
TextClassificationOutcome outcome = new TextClassificationOutcome(aJCas, token.getBegin(), token.getEnd());
outcome.setOutcome(getTextClassificationOutcome(aJCas, aTarget));
outcome.addToIndexes();
}
}
use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class LiblinearSaveAndLoadModelDocumentSingleLabelTest method unitLoadAndUseModel.
private static void unitLoadAndUseModel(File modelFolder) throws Exception {
AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
CollectionReader reader = CollectionReaderFactory.createReader(TeiReader.class, TeiReader.PARAM_SOURCE_LOCATION, unitTrainFolder, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_PATTERNS, Arrays.asList(TeiReader.INCLUDE_PREFIX + "a02.xml"));
List<TextClassificationOutcome> outcomes = new ArrayList<>();
JCas jcas = JCasFactory.createJCas();
jcas.setDocumentLanguage("en");
reader.getNext(jcas.getCas());
tcAnno.process(jcas);
outcomes.addAll(JCasUtil.select(jcas, TextClassificationOutcome.class));
Set<String> possibleOutcomes = new HashSet<>();
possibleOutcomes.add("AT");
possibleOutcomes.add("NP");
possibleOutcomes.add("pct");
possibleOutcomes.add("WDT");
possibleOutcomes.add("JJ");
possibleOutcomes.add("VBD");
possibleOutcomes.add("NNS");
possibleOutcomes.add("TO");
possibleOutcomes.add("VBN");
possibleOutcomes.add("IN");
possibleOutcomes.add("CC");
possibleOutcomes.add("NN");
possibleOutcomes.add("VBD");
possibleOutcomes.add("AP");
possibleOutcomes.add("HVD");
assertEquals(31, outcomes.size());
for (TextClassificationOutcome o : outcomes) {
assertTrue(possibleOutcomes.contains(o.getOutcome()));
}
}
Aggregations