use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class XgboostSaveAndLoadModelDocumentSingleLabelTest method unitLoadAndUseModel.
private static void unitLoadAndUseModel(File modelFolder) throws Exception {
AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
CollectionReader reader = CollectionReaderFactory.createReader(TeiReader.class, TeiReader.PARAM_SOURCE_LOCATION, unitTrainFolder, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_PATTERNS, Arrays.asList(TeiReader.INCLUDE_PREFIX + "a02.xml"));
List<TextClassificationOutcome> outcomes = new ArrayList<>();
JCas jcas = JCasFactory.createJCas();
jcas.setDocumentLanguage("en");
reader.getNext(jcas.getCas());
tcAnno.process(jcas);
outcomes.addAll(JCasUtil.select(jcas, TextClassificationOutcome.class));
Set<String> possibleOutcomes = new HashSet<>();
possibleOutcomes.add("AT");
possibleOutcomes.add("NP");
possibleOutcomes.add("pct");
possibleOutcomes.add("WDT");
possibleOutcomes.add("JJ");
possibleOutcomes.add("VBD");
possibleOutcomes.add("NNS");
possibleOutcomes.add("TO");
possibleOutcomes.add("VBN");
possibleOutcomes.add("IN");
possibleOutcomes.add("CC");
possibleOutcomes.add("NN");
possibleOutcomes.add("VBD");
possibleOutcomes.add("AP");
possibleOutcomes.add("HVD");
assertEquals(31, outcomes.size());
for (TextClassificationOutcome o : outcomes) {
assertTrue(possibleOutcomes.contains(o.getOutcome()));
}
}
use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class LibsvmSaveAndLoadModelDocumentSingleLabelTest method unitLoadAndUseModel.
private static void unitLoadAndUseModel(File modelFolder) throws Exception {
AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
CollectionReader reader = CollectionReaderFactory.createReader(TeiReader.class, TeiReader.PARAM_SOURCE_LOCATION, unitTrainFolder, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_PATTERNS, Arrays.asList(TeiReader.INCLUDE_PREFIX + "a02.xml"));
List<TextClassificationOutcome> outcomes = new ArrayList<>();
JCas jcas = JCasFactory.createJCas();
jcas.setDocumentLanguage("en");
reader.getNext(jcas.getCas());
tcAnno.process(jcas);
outcomes.addAll(JCasUtil.select(jcas, TextClassificationOutcome.class));
// int i=0;
// for(TextClassificationOutcome o: outcomes){
// System.out.println("assertEquals(\"" + o.getOutcome() + "\",
// outcomes.get("+(i++)+").getOutcome());");
// }
assertEquals(31, outcomes.size());
assertEquals("AT", outcomes.get(0).getOutcome());
assertEquals("IN", outcomes.get(1).getOutcome());
assertEquals("pct", outcomes.get(2).getOutcome());
assertEquals("WDT", outcomes.get(3).getOutcome());
assertEquals("NP", outcomes.get(4).getOutcome());
assertEquals("VBD", outcomes.get(5).getOutcome());
assertEquals("AT", outcomes.get(6).getOutcome());
assertEquals("VBN", outcomes.get(7).getOutcome());
assertEquals("RB", outcomes.get(8).getOutcome());
assertEquals("pct", outcomes.get(9).getOutcome());
assertEquals("NP", outcomes.get(10).getOutcome());
assertEquals("CC", outcomes.get(11).getOutcome());
assertEquals("AT", outcomes.get(12).getOutcome());
assertEquals("pct", outcomes.get(13).getOutcome());
assertEquals("JJ", outcomes.get(14).getOutcome());
assertEquals("NN", outcomes.get(15).getOutcome());
assertEquals("pct", outcomes.get(16).getOutcome());
assertEquals("NP", outcomes.get(17).getOutcome());
assertEquals("NN", outcomes.get(18).getOutcome());
assertEquals("CC", outcomes.get(19).getOutcome());
assertEquals("AP", outcomes.get(20).getOutcome());
assertEquals("NN", outcomes.get(21).getOutcome());
assertEquals("IN", outcomes.get(22).getOutcome());
assertEquals("NNS", outcomes.get(23).getOutcome());
assertEquals("JJ", outcomes.get(24).getOutcome());
assertEquals("NP", outcomes.get(25).getOutcome());
assertEquals("IN", outcomes.get(26).getOutcome());
assertEquals("AT", outcomes.get(27).getOutcome());
assertEquals("AT", outcomes.get(28).getOutcome());
assertEquals("JJ", outcomes.get(29).getOutcome());
assertEquals("pct", outcomes.get(30).getOutcome());
}
use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class TestReaderSentenceToDocument method getNext.
@Override
public void getNext(JCas aJCas) throws IOException, CollectionException {
// setting the document text
aJCas.setDocumentText(texts.get(offset));
aJCas.setDocumentLanguage(LANGUAGE_CODE);
// as we are creating more than one CAS out of a single file, we need to have different
// document titles and URIs for each CAS
// otherwise, serialized CASes will be overwritten
DocumentMetaData dmd = DocumentMetaData.create(aJCas);
dmd.setDocumentTitle("Sentence" + offset);
dmd.setDocumentUri("Sentence" + offset);
dmd.setDocumentId(String.valueOf(offset));
JCasId id = new JCasId(aJCas);
id.setId(jcasId);
id.addToIndexes();
// setting the outcome / label for this document
TextClassificationOutcome outcome = new TextClassificationOutcome(aJCas);
outcome.setOutcome(getTextClassificationOutcome(aJCas));
outcome.addToIndexes();
new TextClassificationTarget(aJCas, 0, aJCas.getDocumentText().length()).addToIndexes();
offset++;
}
use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class TestReaderSingleLabelUnitReader method getNext.
@Override
public void getNext(CAS aCAS) throws IOException, CollectionException {
super.getNext(aCAS);
JCas jcas;
try {
jcas = aCAS.getJCas();
JCasId id = new JCasId(jcas);
id.setId(jcasId++);
id.addToIndexes();
} catch (CASException e) {
throw new CollectionException();
}
String documentText = aCAS.getDocumentText();
int s = 0;
for (String t : documentText.split(" ")) {
int e = documentText.indexOf(t, s) + t.length();
new TextClassificationTarget(jcas, s, e).addToIndexes();
new TextClassificationOutcome(jcas, s, e).addToIndexes();
s += 1;
}
}
use of org.dkpro.tc.api.type.TextClassificationOutcome in project dkpro-tc by dkpro.
the class LinewiseTextOutcomeReader method setTextClassificationOutcome.
protected void setTextClassificationOutcome(JCas aJCas, String outcome, int begin, int end) throws IOException {
TextClassificationOutcome tco = new TextClassificationOutcome(aJCas, begin, end);
tco.setOutcome(outcome);
tco.addToIndexes();
}
Aggregations