use of org.apache.uima.collection.CollectionReaderDescription in project webanno by webanno.
the class Conll2009ReaderWriterTest method test.
@Test
public void test() throws Exception {
CollectionReaderDescription reader = createReaderDescription(Conll2009Reader.class, Conll2009Reader.PARAM_SOURCE_LOCATION, "src/test/resources/conll/2009", Conll2009Reader.PARAM_PATTERNS, "en-orig.conll");
AnalysisEngineDescription writer = createEngineDescription(Conll2009Writer.class, Conll2009Writer.PARAM_TARGET_LOCATION, "target/test-output/Conll2009ReaderWriterTest-test", Conll2009Writer.PARAM_FILENAME_EXTENSION, ".conll", Conll2009Writer.PARAM_STRIP_EXTENSION, true);
runPipeline(reader, writer);
String reference = FileUtils.readFileToString(new File("src/test/resources/conll/2009/en-ref.conll"), "UTF-8").trim();
String actual = FileUtils.readFileToString(new File("target/test-output/Conll2009ReaderWriterTest-test/en-orig.conll"), "UTF-8").trim();
assertTrue(IOUtils.contentEqualsIgnoreEOL(new StringReader(reference), new StringReader(actual)));
}
use of org.apache.uima.collection.CollectionReaderDescription in project webanno by webanno.
the class ConllUReaderWriterTest method roundTrip.
@Test
public void roundTrip() throws Exception {
CollectionReaderDescription reader = createReaderDescription(ConllUReader.class, ConllUReader.PARAM_SOURCE_LOCATION, "src/test/resources/conll/u", ConllUReader.PARAM_PATTERNS, "conllu-en-orig.conll");
AnalysisEngineDescription writer = createEngineDescription(ConllUWriter.class, ConllUWriter.PARAM_TARGET_LOCATION, "target/test-output/ConllUReaderWriterTest-roundTrip", ConllUWriter.PARAM_FILENAME_SUFFIX, ".conll", ConllUWriter.PARAM_STRIP_EXTENSION, true);
runPipeline(reader, writer);
String reference = FileUtils.readFileToString(new File("src/test/resources/conll/u/conllu-en-ref.conll"), "UTF-8").trim();
String actual = FileUtils.readFileToString(new File("target/test-output/ConllUReaderWriterTest-roundTrip/conllu-en-orig.conll"), "UTF-8").trim();
assertTrue(IOUtils.contentEqualsIgnoreEOL(new StringReader(reference), new StringReader(actual)));
}
use of org.apache.uima.collection.CollectionReaderDescription in project webanno by webanno.
the class TcfReaderWriterTest method testRoundtrip.
@Test
public void testRoundtrip() throws Exception {
CollectionReaderDescription reader = createReaderDescription(TcfReader.class, TcfReader.PARAM_SOURCE_LOCATION, "src/test/resources/", TcfReader.PARAM_PATTERNS, "wlfxb.xml");
AnalysisEngineDescription writer = createEngineDescription(TcfWriter.class, TcfWriter.PARAM_TARGET_LOCATION, "target/test-output/roundtrip", TcfWriter.PARAM_FILENAME_SUFFIX, ".xml", TcfWriter.PARAM_STRIP_EXTENSION, true);
runPipeline(reader, writer);
String expected = contentOf(new File("src/test/resources/wlfxb.xml"), UTF_8);
String actual = contentOf(new File("target/test-output/roundtrip/wlfxb.xml"), UTF_8);
assertThat(expected).isXmlEqualTo(actual);
}
use of org.apache.uima.collection.CollectionReaderDescription in project webanno by webanno.
the class TcfReaderWriterTest method testOneWay.
public void testOneWay(String aInputFile, String aExpectedFile) throws Exception {
CollectionReaderDescription reader = createReaderDescription(TcfReader.class, TcfReader.PARAM_SOURCE_LOCATION, "src/test/resources/", TcfReader.PARAM_PATTERNS, aInputFile);
AnalysisEngineDescription writer = createEngineDescription(TcfWriter.class, TcfWriter.PARAM_TARGET_LOCATION, "target/test-output/oneway", TcfWriter.PARAM_FILENAME_SUFFIX, ".xml", TcfWriter.PARAM_STRIP_EXTENSION, true);
AnalysisEngineDescription dumper = createEngineDescription(CasDumpWriter.class, CasDumpWriter.PARAM_OUTPUT_FILE, "target/test-output/oneway/dump.txt");
runPipeline(reader, writer, dumper);
InputStream isReference = new FileInputStream(new File("src/test/resources/" + aExpectedFile));
InputStream isActual = new FileInputStream(new File("target/test-output/oneway/" + aInputFile));
WLData wLDataReference = WLDObjector.read(isReference);
TextCorpusStored aCorpusDataReference = wLDataReference.getTextCorpus();
WLData wLDataActual = WLDObjector.read(isActual);
TextCorpusStored aCorpusDataActual = wLDataActual.getTextCorpus();
// check if layers maintained
assertEquals(aCorpusDataReference.getLayers().size(), aCorpusDataActual.getLayers().size());
// Check if every layers have the same number of annotations
for (TextCorpusLayer layer : aCorpusDataReference.getLayers()) {
assertEquals("Layer size mismatch in [" + layer.getClass().getName() + "]", layer.size(), getLayer(aCorpusDataActual, layer.getClass()).size());
}
XMLAssert.assertXMLEqual(new InputSource("src/test/resources/" + aExpectedFile), new InputSource(new File("target/test-output/oneway/" + aInputFile).getPath()));
}
use of org.apache.uima.collection.CollectionReaderDescription in project webanno by webanno.
the class WebAnnoTsv3XReaderWriterRoundTripTest method runTest.
@Test
public void runTest() throws Exception {
TypeSystemDescription global = TypeSystemDescriptionFactory.createTypeSystemDescription();
TypeSystemDescription local;
if (new File(referenceFolder, "typesystem.xml").exists()) {
local = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath(new File(referenceFolder, "typesystem.xml").toString());
} else {
local = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath("src/test/resources/desc/type/webannoTestTypes.xml");
}
TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(asList(global, local));
String targetFolder = "target/test-output/WebAnnoTsv3XReaderWriterRoundTripTest/" + referenceFolder.getName();
CollectionReaderDescription reader = createReaderDescription(WebannoTsv3XReader.class, merged, WebannoTsv3XReader.PARAM_SOURCE_LOCATION, referenceFolder, WebannoTsv3XReader.PARAM_PATTERNS, "reference.tsv");
AnalysisEngineDescription checker = createEngineDescription(DKProCoreConventionsChecker.class);
AnalysisEngineDescription tsvWriter = createEngineDescription(WebannoTsv3XWriter.class, merged, WebannoTsv3XWriter.PARAM_TARGET_LOCATION, targetFolder, WebannoTsv3XWriter.PARAM_STRIP_EXTENSION, true);
AnalysisEngineDescription xmiWriter = createEngineDescription(XmiWriter.class, merged, XmiWriter.PARAM_TARGET_LOCATION, targetFolder, XmiWriter.PARAM_STRIP_EXTENSION, true);
SimplePipeline.runPipeline(reader, checker, tsvWriter, xmiWriter);
String referenceTsv = FileUtils.readFileToString(new File(referenceFolder, "reference.tsv"), "UTF-8");
String actualTsv = FileUtils.readFileToString(new File(targetFolder, "reference.tsv"), "UTF-8");
//
// The XMI files here are not compared semantically but using their serialization which
// is subject to minor variations depending e.g. on the order in which annotation are
// created in the CAS. Thus, this code is commented out and should only be used on a
// case-by-case base to compare XMIs during development.
//
// String referenceXmi = FileUtils.readFileToString(new File(referenceFolder,
// "reference.xmi"),
// "UTF-8");
//
// String actualXmi = FileUtils.readFileToString(new File(targetFolder, "reference.xmi"),
// "UTF-8");
assertEquals(referenceTsv, actualTsv);
// assertEquals(referenceXmi, actualXmi);
}
Aggregations