use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class LinewiseTextReader method getNext.
public void getNext(JCas aJCas) throws IOException, CollectionException {
DocumentMetaData md = new DocumentMetaData(aJCas);
md.setDocumentTitle("");
md.setDocumentId("" + (instanceId++));
md.setLanguage(language);
md.addToIndexes();
String[] split = nextLine.split("\t");
String documentText = split[1];
String label = split[0];
documentText = checkUnescapeHtml(documentText);
documentText = checkUnescapeJava(documentText);
aJCas.setDocumentText(documentText);
TextClassificationOutcome outcome = new TextClassificationOutcome(aJCas);
outcome.setOutcome(label);
outcome.addToIndexes();
checkSetSentence(aJCas);
}
use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class CRFSuiteSaveAndLoadModelTest method loadModelArowParameters.
@Test
public void loadModelArowParameters() throws Exception {
Map<String, Object> config = new HashMap<>();
config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new CrfSuiteAdapter(), CrfSuiteAdapter.ALGORITHM_ADAPTIVE_REGULARIZATION_OF_WEIGHT_VECTOR, "-p", "max_iterations=2" });
config.put(DIM_DATA_WRITER, new CrfSuiteAdapter().getDataWriterClass().getName());
config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
// create a model
File modelFolder = folder.newFolder();
ParameterSpace pSpace = getParameterSpace(mlas);
executeSaveModelIntoTemporyFolder(pSpace, modelFolder);
JCas jcas = JCasFactory.createJCas();
jcas.setDocumentText("This is an example text. It has 2 sentences.");
jcas.setDocumentLanguage("en");
AnalysisEngine tokenizer = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_SEQUENCE_ANNOTATION, Sentence.class.getName(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
tokenizer.process(jcas);
tcAnno.process(jcas);
List<TextClassificationOutcome> outcomes = new ArrayList<>(JCasUtil.select(jcas, TextClassificationOutcome.class));
// 9 token + 2 punctuation marks
assertEquals(11, outcomes.size());
for (TextClassificationOutcome o : outcomes) {
assertTrue(postags.contains(o.getOutcome()));
}
}
use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class BrownCorpusReader method getNext.
@Override
public void getNext(CAS cas) throws IOException, CollectionException {
super.getNext(cas);
JCas jcas;
try {
jcas = cas.getJCas();
} catch (CASException e) {
throw new CollectionException(e);
}
for (Sentence sentence : JCasUtil.select(jcas, Sentence.class)) {
TextClassificationSequence sequence = new TextClassificationSequence(jcas, sentence.getBegin(), sentence.getEnd());
sequence.addToIndexes();
for (Token token : JCasUtil.selectCovered(jcas, Token.class, sentence)) {
TextClassificationTarget unit = new TextClassificationTarget(jcas, token.getBegin(), token.getEnd());
// will add the token content as a suffix to the ID of this unit
unit.setSuffix(token.getCoveredText());
unit.addToIndexes();
TextClassificationOutcome outcome = new TextClassificationOutcome(jcas, token.getBegin(), token.getEnd());
outcome.setOutcome(getTextClassificationOutcome(jcas, unit));
outcome.addToIndexes();
}
}
}
use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class ReutersCorpusReader method getNext.
@Override
public void getNext(CAS aCAS) throws IOException, CollectionException {
super.getNext(aCAS);
JCas jcas;
try {
jcas = aCAS.getJCas();
} catch (CASException e) {
throw new CollectionException();
}
for (String outcomeValue : getTextClassificationOutcomes(jcas)) {
TextClassificationOutcome outcome = new TextClassificationOutcome(jcas);
outcome.setOutcome(outcomeValue);
outcome.addToIndexes();
}
}
use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class MultiLabelOutcomeAnnotator method process.
@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
try {
for (String outcomeValue : getTextClassificationOutcomes(jcas)) {
TextClassificationOutcome outcome = new TextClassificationOutcome(jcas);
outcome.setOutcome(outcomeValue);
outcome.addToIndexes();
}
} catch (CollectionException e) {
throw new AnalysisEngineProcessException(e);
}
}
Aggregations