use of org.apache.uima.collection.CollectionReader in project dkpro-tc by dkpro.
the class XgboostSaveAndLoadModelDocumentSingleLabelTest method documentLoadAndUseModel.
private static void documentLoadAndUseModel(File modelFolder, boolean evaluateWithClassificationArgs) throws Exception {
AnalysisEngine tokenizer = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath());
CollectionReader reader = CollectionReaderFactory.createReader(TextReader.class, TextReader.PARAM_SOURCE_LOCATION, documentTestFolder, TextReader.PARAM_LANGUAGE, "en", TextReader.PARAM_PATTERNS, Arrays.asList(TextReader.INCLUDE_PREFIX + "*/*.txt"));
List<TextClassificationOutcome> outcomes = new ArrayList<>();
while (reader.hasNext()) {
JCas jcas = JCasFactory.createJCas();
reader.getNext(jcas.getCas());
jcas.setDocumentLanguage("en");
tokenizer.process(jcas);
tcAnno.process(jcas);
outcomes.add(JCasUtil.selectSingle(jcas, TextClassificationOutcome.class));
}
assertEquals(4, outcomes.size());
if (evaluateWithClassificationArgs) {
assertEquals(4, outcomes.size());
assertEquals("emotional", outcomes.get(0).getOutcome());
assertEquals("neutral", outcomes.get(1).getOutcome());
assertEquals("neutral", outcomes.get(2).getOutcome());
assertEquals("neutral", outcomes.get(3).getOutcome());
} else {
assertEquals(4, outcomes.size());
assertEquals("emotional", outcomes.get(0).getOutcome());
assertEquals("emotional", outcomes.get(1).getOutcome());
assertEquals("emotional", outcomes.get(2).getOutcome());
assertEquals("emotional", outcomes.get(3).getOutcome());
}
}
use of org.apache.uima.collection.CollectionReader in project dkpro-tc by dkpro.
the class XgboostSaveAndLoadModelDocumentSingleLabelTest method unitLoadAndUseModel.
private static void unitLoadAndUseModel(File modelFolder) throws Exception {
AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
CollectionReader reader = CollectionReaderFactory.createReader(TeiReader.class, TeiReader.PARAM_SOURCE_LOCATION, unitTrainFolder, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_PATTERNS, Arrays.asList(TeiReader.INCLUDE_PREFIX + "a02.xml"));
List<TextClassificationOutcome> outcomes = new ArrayList<>();
JCas jcas = JCasFactory.createJCas();
jcas.setDocumentLanguage("en");
reader.getNext(jcas.getCas());
tcAnno.process(jcas);
outcomes.addAll(JCasUtil.select(jcas, TextClassificationOutcome.class));
Set<String> possibleOutcomes = new HashSet<>();
possibleOutcomes.add("AT");
possibleOutcomes.add("NP");
possibleOutcomes.add("pct");
possibleOutcomes.add("WDT");
possibleOutcomes.add("JJ");
possibleOutcomes.add("VBD");
possibleOutcomes.add("NNS");
possibleOutcomes.add("TO");
possibleOutcomes.add("VBN");
possibleOutcomes.add("IN");
possibleOutcomes.add("CC");
possibleOutcomes.add("NN");
possibleOutcomes.add("VBD");
possibleOutcomes.add("AP");
possibleOutcomes.add("HVD");
assertEquals(31, outcomes.size());
for (TextClassificationOutcome o : outcomes) {
assertTrue(possibleOutcomes.contains(o.getOutcome()));
}
}
use of org.apache.uima.collection.CollectionReader in project dkpro-tc by dkpro.
the class LibsvmSaveAndLoadModelDocumentSingleLabelTest method unitLoadAndUseModel.
private static void unitLoadAndUseModel(File modelFolder) throws Exception {
AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
CollectionReader reader = CollectionReaderFactory.createReader(TeiReader.class, TeiReader.PARAM_SOURCE_LOCATION, unitTrainFolder, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_PATTERNS, Arrays.asList(TeiReader.INCLUDE_PREFIX + "a02.xml"));
List<TextClassificationOutcome> outcomes = new ArrayList<>();
JCas jcas = JCasFactory.createJCas();
jcas.setDocumentLanguage("en");
reader.getNext(jcas.getCas());
tcAnno.process(jcas);
outcomes.addAll(JCasUtil.select(jcas, TextClassificationOutcome.class));
// int i=0;
// for(TextClassificationOutcome o: outcomes){
// System.out.println("assertEquals(\"" + o.getOutcome() + "\",
// outcomes.get("+(i++)+").getOutcome());");
// }
assertEquals(31, outcomes.size());
assertEquals("AT", outcomes.get(0).getOutcome());
assertEquals("IN", outcomes.get(1).getOutcome());
assertEquals("pct", outcomes.get(2).getOutcome());
assertEquals("WDT", outcomes.get(3).getOutcome());
assertEquals("NP", outcomes.get(4).getOutcome());
assertEquals("VBD", outcomes.get(5).getOutcome());
assertEquals("AT", outcomes.get(6).getOutcome());
assertEquals("VBN", outcomes.get(7).getOutcome());
assertEquals("RB", outcomes.get(8).getOutcome());
assertEquals("pct", outcomes.get(9).getOutcome());
assertEquals("NP", outcomes.get(10).getOutcome());
assertEquals("CC", outcomes.get(11).getOutcome());
assertEquals("AT", outcomes.get(12).getOutcome());
assertEquals("pct", outcomes.get(13).getOutcome());
assertEquals("JJ", outcomes.get(14).getOutcome());
assertEquals("NN", outcomes.get(15).getOutcome());
assertEquals("pct", outcomes.get(16).getOutcome());
assertEquals("NP", outcomes.get(17).getOutcome());
assertEquals("NN", outcomes.get(18).getOutcome());
assertEquals("CC", outcomes.get(19).getOutcome());
assertEquals("AP", outcomes.get(20).getOutcome());
assertEquals("NN", outcomes.get(21).getOutcome());
assertEquals("IN", outcomes.get(22).getOutcome());
assertEquals("NNS", outcomes.get(23).getOutcome());
assertEquals("JJ", outcomes.get(24).getOutcome());
assertEquals("NP", outcomes.get(25).getOutcome());
assertEquals("IN", outcomes.get(26).getOutcome());
assertEquals("AT", outcomes.get(27).getOutcome());
assertEquals("AT", outcomes.get(28).getOutcome());
assertEquals("JJ", outcomes.get(29).getOutcome());
assertEquals("pct", outcomes.get(30).getOutcome());
}
use of org.apache.uima.collection.CollectionReader in project dkpro-tc by dkpro.
the class FolderwiseDataReaderTest method testReader.
@Test
public void testReader() throws Exception {
CollectionReader reader = CollectionReaderFactory.createReader(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, "src/test/resources/folderwise/**/", FolderwiseDataReader.PARAM_PATTERNS, "*.txt");
List<String> readDocuments = new ArrayList<>();
List<String> readOutcomes = new ArrayList<>();
while (reader.hasNext()) {
JCas emptyCas = JCasFactory.createJCas();
reader.getNext(emptyCas.getCas());
readDocuments.add(JCasUtil.selectSingle(emptyCas, TextClassificationTarget.class).getCoveredText());
readOutcomes.add(JCasUtil.selectSingle(emptyCas, TextClassificationOutcome.class).getOutcome());
}
assertEquals(5, readDocuments.size());
assertEquals("This is a really odd test tweet :-) #test #nonsense", readDocuments.get(0));
assertEquals("it's raining all day and i don't care", readDocuments.get(1));
assertEquals("This is another really odd test tweet :-) #moreTests #randomness", readDocuments.get(2));
assertEquals("dkpro tc is a wonderful tool to classify tweets #LoveIt #MachineLearning", readDocuments.get(3));
assertEquals("Not even close to Friday :( #IHateMonday", readDocuments.get(4));
assertEquals(5, readOutcomes.size());
assertEquals("neutral", readOutcomes.get(0));
assertEquals("neutral", readOutcomes.get(1));
assertEquals("neutral", readOutcomes.get(2));
assertEquals("emotional", readOutcomes.get(3));
assertEquals("emotional", readOutcomes.get(4));
}
use of org.apache.uima.collection.CollectionReader in project dkpro-tc by dkpro.
the class SequenceOutcomeReaderTest method testSkipLineReader.
@Test
public void testSkipLineReader() throws Exception {
CollectionReader reader = CollectionReaderFactory.createReader(SequenceOutcomeReader.class, SequenceOutcomeReader.PARAM_SOURCE_LOCATION, "src/test/resources/sequence/posDummy.txt", SequenceOutcomeReader.PARAM_SKIP_LINES_START_WITH_STRING, "#");
List<List<String>> readSequences = new ArrayList<>();
List<List<String>> readOutcomes = new ArrayList<>();
while (reader.hasNext()) {
JCas theJCas = JCasFactory.createJCas();
reader.getNext(theJCas.getCas());
Collection<TextClassificationSequence> sequence = JCasUtil.select(theJCas, TextClassificationSequence.class);
for (TextClassificationSequence s : sequence) {
List<TextClassificationTarget> targets = JCasUtil.selectCovered(theJCas, TextClassificationTarget.class, s);
List<String> tokens = new ArrayList<>();
for (TextClassificationTarget target : targets) {
tokens.add(target.getCoveredText());
}
readSequences.add(tokens);
}
Collection<TextClassificationOutcome> outcomeAnnotations = JCasUtil.select(theJCas, TextClassificationOutcome.class);
List<String> outcomes = new ArrayList<>();
for (TextClassificationOutcome o : outcomeAnnotations) {
outcomes.add(o.getOutcome());
}
readOutcomes.add(outcomes);
}
assertEquals(4, readSequences.get(1).size());
// 2 - tokens
assertEquals("This2", readSequences.get(1).get(0));
assertEquals("is2", readSequences.get(1).get(1));
assertEquals("a2", readSequences.get(1).get(2));
assertEquals("!", readSequences.get(1).get(3));
}
Aggregations