use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class TestReaderRegression method getNext.
@Override
public void getNext(CAS aCAS) throws IOException, CollectionException {
super.getNext(aCAS);
JCas jcas;
try {
jcas = aCAS.getJCas();
JCasId id = new JCasId(jcas);
id.setId(jcasId++);
id.addToIndexes();
} catch (CASException e) {
throw new CollectionException();
}
TextClassificationOutcome outcome = new TextClassificationOutcome(jcas);
outcome.setOutcome(getTextClassificationOutcome(jcas));
outcome.addToIndexes();
}
use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class PairTwentyNewsgroupsReader method getNext.
@Override
public void getNext(JCas jcas) throws IOException, CollectionException {
doc1 = new File(listOfFiles.get(currentParsedFilePointer).get(0));
doc2 = new File(listOfFiles.get(currentParsedFilePointer).get(1));
super.getNext(jcas);
TextClassificationOutcome outcome = new TextClassificationOutcome(jcas);
String outcomeString = getTextClassificationOutcome(jcas);
outcome.setOutcome(outcomeString);
outcome.addToIndexes();
currentParsedFilePointer++;
}
use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class SequenceOutcomeAnnotator method process.
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
for (Sentence sent : JCasUtil.select(aJCas, Sentence.class)) {
TextClassificationSequence sequence = new TextClassificationSequence(aJCas, sent.getBegin(), sent.getEnd());
sequence.addToIndexes();
List<Token> tokens = JCasUtil.selectCovered(aJCas, Token.class, sent);
for (Token token : tokens) {
TextClassificationTarget unit = new TextClassificationTarget(aJCas, token.getBegin(), token.getEnd());
unit.setId(tcId++);
unit.setSuffix(token.getCoveredText());
unit.addToIndexes();
TextClassificationOutcome outcome = new TextClassificationOutcome(aJCas, token.getBegin(), token.getEnd());
outcome.setOutcome(getTextClassificationOutcome(aJCas, unit));
outcome.addToIndexes();
}
}
}
use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class STSReader method getNext.
@Override
public void getNext(JCas jcas) throws IOException, CollectionException {
super.getNext(jcas);
jcas.setDocumentText("");
TextClassificationOutcome outcome = new TextClassificationOutcome(jcas);
outcome.setOutcome(getTextClassificationOutcome(jcas));
outcome.addToIndexes();
// as we are creating more than one CAS out of a single file, we need to have different
// document titles and URIs for each CAS
// otherwise, serialized CASes will be overwritten
DocumentMetaData dmd = DocumentMetaData.get(jcas);
dmd.setDocumentTitle(dmd.getDocumentTitle() + "-" + fileOffset);
dmd.setDocumentUri(dmd.getDocumentUri() + "-" + fileOffset);
fileOffset++;
}
use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class CRFSuiteSaveAndLoadModelTest method loadModelArow.
@Test
public void loadModelArow() throws Exception {
Map<String, Object> config = new HashMap<>();
config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new CrfSuiteAdapter(), CrfSuiteAdapter.ALGORITHM_ADAPTIVE_REGULARIZATION_OF_WEIGHT_VECTOR, "-p", "max_iterations=2" });
config.put(DIM_DATA_WRITER, new CrfSuiteAdapter().getDataWriterClass().getName());
config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
// create a model
File modelFolder = folder.newFolder();
ParameterSpace pSpace = getParameterSpace(mlas);
executeSaveModelIntoTemporyFolder(pSpace, modelFolder);
JCas jcas = JCasFactory.createJCas();
jcas.setDocumentText("This is an example text. It has 2 sentences.");
jcas.setDocumentLanguage("en");
AnalysisEngine tokenizer = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_SEQUENCE_ANNOTATION, Sentence.class.getName(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
tokenizer.process(jcas);
tcAnno.process(jcas);
List<TextClassificationOutcome> outcomes = new ArrayList<>(JCasUtil.select(jcas, TextClassificationOutcome.class));
// 9 token + 2 punctuation marks
assertEquals(11, outcomes.size());
for (TextClassificationOutcome o : outcomes) {
String label = o.getOutcome();
assertTrue(postags.contains(label));
}
}
Aggregations