use of org.apache.uima.collection.CollectionReader in project webanno by webanno.
the class WebAnnoTsv3XReaderWriterTest method test.
@Test
public void test() throws Exception {
String targetFolder = "target/test-output/" + testContext.getTestOutputFolderName();
CollectionReader reader = CollectionReaderFactory.createReader(WebannoTsv3XReader.class, WebannoTsv3XReader.PARAM_SOURCE_LOCATION, "src/test/resources/tsv3/", WebannoTsv3XReader.PARAM_PATTERNS, "coref.tsv");
AnalysisEngineDescription writer = createEngineDescription(WebannoTsv3XWriter.class, WebannoTsv3XWriter.PARAM_TARGET_LOCATION, targetFolder, WebannoTsv3XWriter.PARAM_STRIP_EXTENSION, true);
runPipeline(reader, writer);
CollectionReader reader1 = CollectionReaderFactory.createReader(WebannoTsv3XReader.class, WebannoTsv3XReader.PARAM_SOURCE_LOCATION, "src/test/resources/tsv3/", WebannoTsv3XReader.PARAM_PATTERNS, "coref.tsv");
CollectionReader reader2 = CollectionReaderFactory.createReader(WebannoTsv3XReader.class, WebannoTsv3XReader.PARAM_SOURCE_LOCATION, targetFolder, WebannoTsv3XReader.PARAM_PATTERNS, "coref.tsv");
CAS cas1 = JCasFactory.createJCas().getCas();
reader1.getNext(cas1);
CAS cas2 = JCasFactory.createJCas().getCas();
reader2.getNext(cas2);
assertEquals(JCasUtil.select(cas2.getJCas(), Token.class).size(), JCasUtil.select(cas1.getJCas(), Token.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), POS.class).size(), JCasUtil.select(cas1.getJCas(), POS.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), Lemma.class).size(), JCasUtil.select(cas1.getJCas(), Lemma.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), NamedEntity.class).size(), JCasUtil.select(cas1.getJCas(), NamedEntity.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), Sentence.class).size(), JCasUtil.select(cas1.getJCas(), Sentence.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), Dependency.class).size(), JCasUtil.select(cas1.getJCas(), Dependency.class).size());
}
use of org.apache.uima.collection.CollectionReader in project webanno by webanno.
the class XmiWriterReaderTest method read.
public void read() throws Exception {
CollectionReader xmiReader = CollectionReaderFactory.createReader(XmiReader.class, ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, testFolder.getRoot().getPath(), ResourceCollectionReaderBase.PARAM_PATTERNS, new String[] { ResourceCollectionReaderBase.INCLUDE_PREFIX + "*.xmi" });
CAS cas = CasCreationUtils.createCas(createTypeSystemDescription(), null, null);
xmiReader.getNext(cas);
String refText = readFileToString(new File("src/test/resources/texts/latin.txt"));
assertEquals(refText, cas.getDocumentText());
assertEquals("latin", cas.getDocumentLanguage());
}
use of org.apache.uima.collection.CollectionReader in project webanno by webanno.
the class TwoPairedKappaTest method init.
@Before
public void init() throws Exception {
user1 = new User();
user1.setUsername("user1");
user2 = new User();
user2.setUsername("user2");
user3 = new User();
user3.setUsername("user3");
document = new SourceDocument();
kappatestCas = JCasFactory.createJCas().getCas();
CollectionReader reader1 = createReader(WebannoTsv2Reader.class, WebannoTsv2Reader.PARAM_SOURCE_LOCATION, "src/test/resources/", WebannoTsv2Reader.PARAM_PATTERNS, "kappatest.tsv");
reader1.getNext(kappatestCas);
kappaspandiff = JCasFactory.createJCas().getCas();
CollectionReader reader2 = createReader(WebannoTsv2Reader.class, WebannoTsv2Reader.PARAM_SOURCE_LOCATION, "src/test/resources/", WebannoTsv2Reader.PARAM_PATTERNS, "kappaspandiff.tsv");
reader2.getNext(kappaspandiff);
kappaarcdiff = JCasFactory.createJCas().getCas();
CollectionReader reader3 = createReader(WebannoTsv2Reader.class, WebannoTsv2Reader.PARAM_SOURCE_LOCATION, "src/test/resources/", WebannoTsv2Reader.PARAM_PATTERNS, "kappaarcdiff.tsv");
reader3.getNext(kappaarcdiff);
kappaspanarcdiff = JCasFactory.createJCas().getCas();
CollectionReader reader4 = createReader(WebannoTsv2Reader.class, WebannoTsv2Reader.PARAM_SOURCE_LOCATION, "src/test/resources/", WebannoTsv2Reader.PARAM_PATTERNS, "kappaspanarcdiff.tsv");
reader4.getNext(kappaspanarcdiff);
}
use of org.apache.uima.collection.CollectionReader in project dkpro-tc by dkpro.
the class WekaSaveAndLoadModelDocumentRegression method regressionLoadModel.
private void regressionLoadModel(File modelFolder) throws UIMAException, IOException {
CollectionReader reader = CollectionReaderFactory.createReader(LinewiseTextOutcomeReader.class, LinewiseTextOutcomeReader.PARAM_OUTCOME_INDEX, 0, LinewiseTextOutcomeReader.PARAM_TEXT_INDEX, 1, LinewiseTextOutcomeReader.PARAM_SOURCE_LOCATION, regressionTest, LinewiseTextOutcomeReader.PARAM_LANGUAGE, "en");
AnalysisEngine segmenter = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
JCas jcas = JCasFactory.createJCas();
reader.hasNext();
reader.getNext(jcas.getCas());
segmenter.process(jcas);
tcAnno.process(jcas);
List<TextClassificationOutcome> outcomes = new ArrayList<>(JCasUtil.select(jcas, TextClassificationOutcome.class));
assertEquals(1, outcomes.size());
Double d = Double.valueOf(outcomes.get(0).getOutcome());
assertTrue(d > 0.1 && d < 5);
}
use of org.apache.uima.collection.CollectionReader in project dkpro-tc by dkpro.
the class WekaSaveAndLoadModelUnitTest method unitLoadModel.
private static void unitLoadModel(File modelFolder) throws Exception {
CollectionReader reader = CollectionReaderFactory.createReader(TeiReader.class, TeiReader.PARAM_SOURCE_LOCATION, unitTrainFolder, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_PATTERNS, "*.xml");
AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
JCas jcas = JCasFactory.createJCas();
reader.getNext(jcas.getCas());
// sanity check i.e. check
assertEquals(163, JCasUtil.select(jcas, Token.class).size());
// number of tokens which
// determine number of
// outcomes that are expected
tcAnno.process(jcas);
Collection<TextClassificationOutcome> outcomes = JCasUtil.select(jcas, TextClassificationOutcome.class);
// 168 tokens in the training file we expect for each an
assertEquals(163, outcomes.size());
// outcome
for (TextClassificationOutcome o : outcomes) {
assertTrue(!o.getOutcome().isEmpty());
}
}
Aggregations