Search in sources :

Example 1 with CollectionReaderDescription

use of org.apache.uima.collection.CollectionReaderDescription in project webanno by webanno.

the class Conll2009ReaderWriterTest method test.

@Test
public void test() throws Exception {
    CollectionReaderDescription reader = createReaderDescription(Conll2009Reader.class, Conll2009Reader.PARAM_SOURCE_LOCATION, "src/test/resources/conll/2009", Conll2009Reader.PARAM_PATTERNS, "en-orig.conll");
    AnalysisEngineDescription writer = createEngineDescription(Conll2009Writer.class, Conll2009Writer.PARAM_TARGET_LOCATION, "target/test-output/Conll2009ReaderWriterTest-test", Conll2009Writer.PARAM_FILENAME_EXTENSION, ".conll", Conll2009Writer.PARAM_STRIP_EXTENSION, true);
    runPipeline(reader, writer);
    String reference = FileUtils.readFileToString(new File("src/test/resources/conll/2009/en-ref.conll"), "UTF-8").trim();
    String actual = FileUtils.readFileToString(new File("target/test-output/Conll2009ReaderWriterTest-test/en-orig.conll"), "UTF-8").trim();
    assertTrue(IOUtils.contentEqualsIgnoreEOL(new StringReader(reference), new StringReader(actual)));
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) StringReader(java.io.StringReader) File(java.io.File) Test(org.junit.Test)

Example 2 with CollectionReaderDescription

use of org.apache.uima.collection.CollectionReaderDescription in project webanno by webanno.

the class ConllUReaderWriterTest method roundTrip.

@Test
public void roundTrip() throws Exception {
    CollectionReaderDescription reader = createReaderDescription(ConllUReader.class, ConllUReader.PARAM_SOURCE_LOCATION, "src/test/resources/conll/u", ConllUReader.PARAM_PATTERNS, "conllu-en-orig.conll");
    AnalysisEngineDescription writer = createEngineDescription(ConllUWriter.class, ConllUWriter.PARAM_TARGET_LOCATION, "target/test-output/ConllUReaderWriterTest-roundTrip", ConllUWriter.PARAM_FILENAME_SUFFIX, ".conll", ConllUWriter.PARAM_STRIP_EXTENSION, true);
    runPipeline(reader, writer);
    String reference = FileUtils.readFileToString(new File("src/test/resources/conll/u/conllu-en-ref.conll"), "UTF-8").trim();
    String actual = FileUtils.readFileToString(new File("target/test-output/ConllUReaderWriterTest-roundTrip/conllu-en-orig.conll"), "UTF-8").trim();
    assertTrue(IOUtils.contentEqualsIgnoreEOL(new StringReader(reference), new StringReader(actual)));
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) StringReader(java.io.StringReader) File(java.io.File) Test(org.junit.Test)

Example 3 with CollectionReaderDescription

use of org.apache.uima.collection.CollectionReaderDescription in project webanno by webanno.

the class TcfReaderWriterTest method testRoundtrip.

@Test
public void testRoundtrip() throws Exception {
    CollectionReaderDescription reader = createReaderDescription(TcfReader.class, TcfReader.PARAM_SOURCE_LOCATION, "src/test/resources/", TcfReader.PARAM_PATTERNS, "wlfxb.xml");
    AnalysisEngineDescription writer = createEngineDescription(TcfWriter.class, TcfWriter.PARAM_TARGET_LOCATION, "target/test-output/roundtrip", TcfWriter.PARAM_FILENAME_SUFFIX, ".xml", TcfWriter.PARAM_STRIP_EXTENSION, true);
    runPipeline(reader, writer);
    String expected = contentOf(new File("src/test/resources/wlfxb.xml"), UTF_8);
    String actual = contentOf(new File("target/test-output/roundtrip/wlfxb.xml"), UTF_8);
    assertThat(expected).isXmlEqualTo(actual);
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) File(java.io.File) Test(org.junit.Test)

Example 4 with CollectionReaderDescription

use of org.apache.uima.collection.CollectionReaderDescription in project webanno by webanno.

the class TcfReaderWriterTest method testOneWay.

public void testOneWay(String aInputFile, String aExpectedFile) throws Exception {
    CollectionReaderDescription reader = createReaderDescription(TcfReader.class, TcfReader.PARAM_SOURCE_LOCATION, "src/test/resources/", TcfReader.PARAM_PATTERNS, aInputFile);
    AnalysisEngineDescription writer = createEngineDescription(TcfWriter.class, TcfWriter.PARAM_TARGET_LOCATION, "target/test-output/oneway", TcfWriter.PARAM_FILENAME_SUFFIX, ".xml", TcfWriter.PARAM_STRIP_EXTENSION, true);
    AnalysisEngineDescription dumper = createEngineDescription(CasDumpWriter.class, CasDumpWriter.PARAM_OUTPUT_FILE, "target/test-output/oneway/dump.txt");
    runPipeline(reader, writer, dumper);
    InputStream isReference = new FileInputStream(new File("src/test/resources/" + aExpectedFile));
    InputStream isActual = new FileInputStream(new File("target/test-output/oneway/" + aInputFile));
    WLData wLDataReference = WLDObjector.read(isReference);
    TextCorpusStored aCorpusDataReference = wLDataReference.getTextCorpus();
    WLData wLDataActual = WLDObjector.read(isActual);
    TextCorpusStored aCorpusDataActual = wLDataActual.getTextCorpus();
    // check if layers maintained
    assertEquals(aCorpusDataReference.getLayers().size(), aCorpusDataActual.getLayers().size());
    // Check if every layers have the same number of annotations
    for (TextCorpusLayer layer : aCorpusDataReference.getLayers()) {
        assertEquals("Layer size mismatch in [" + layer.getClass().getName() + "]", layer.size(), getLayer(aCorpusDataActual, layer.getClass()).size());
    }
    XMLAssert.assertXMLEqual(new InputSource("src/test/resources/" + aExpectedFile), new InputSource(new File("target/test-output/oneway/" + aInputFile).getPath()));
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) TextCorpusLayer(eu.clarin.weblicht.wlfxb.tc.api.TextCorpusLayer) InputSource(org.xml.sax.InputSource) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) TextCorpusStored(eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) WLData(eu.clarin.weblicht.wlfxb.xb.WLData) File(java.io.File) FileInputStream(java.io.FileInputStream)

Example 5 with CollectionReaderDescription

use of org.apache.uima.collection.CollectionReaderDescription in project webanno by webanno.

the class WebAnnoTsv3XReaderWriterRoundTripTest method runTest.

@Test
public void runTest() throws Exception {
    TypeSystemDescription global = TypeSystemDescriptionFactory.createTypeSystemDescription();
    TypeSystemDescription local;
    if (new File(referenceFolder, "typesystem.xml").exists()) {
        local = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath(new File(referenceFolder, "typesystem.xml").toString());
    } else {
        local = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath("src/test/resources/desc/type/webannoTestTypes.xml");
    }
    TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(asList(global, local));
    String targetFolder = "target/test-output/WebAnnoTsv3XReaderWriterRoundTripTest/" + referenceFolder.getName();
    CollectionReaderDescription reader = createReaderDescription(WebannoTsv3XReader.class, merged, WebannoTsv3XReader.PARAM_SOURCE_LOCATION, referenceFolder, WebannoTsv3XReader.PARAM_PATTERNS, "reference.tsv");
    AnalysisEngineDescription checker = createEngineDescription(DKProCoreConventionsChecker.class);
    AnalysisEngineDescription tsvWriter = createEngineDescription(WebannoTsv3XWriter.class, merged, WebannoTsv3XWriter.PARAM_TARGET_LOCATION, targetFolder, WebannoTsv3XWriter.PARAM_STRIP_EXTENSION, true);
    AnalysisEngineDescription xmiWriter = createEngineDescription(XmiWriter.class, merged, XmiWriter.PARAM_TARGET_LOCATION, targetFolder, XmiWriter.PARAM_STRIP_EXTENSION, true);
    SimplePipeline.runPipeline(reader, checker, tsvWriter, xmiWriter);
    String referenceTsv = FileUtils.readFileToString(new File(referenceFolder, "reference.tsv"), "UTF-8");
    String actualTsv = FileUtils.readFileToString(new File(targetFolder, "reference.tsv"), "UTF-8");
    // 
    // The XMI files here are not compared semantically but using their serialization which
    // is subject to minor variations depending e.g. on the order in which annotation are
    // created in the CAS. Thus, this code is commented out and should only be used on a
    // case-by-case base to compare XMIs during development.
    // 
    // String referenceXmi = FileUtils.readFileToString(new File(referenceFolder,
    // "reference.xmi"),
    // "UTF-8");
    // 
    // String actualXmi = FileUtils.readFileToString(new File(targetFolder, "reference.xmi"),
    // "UTF-8");
    assertEquals(referenceTsv, actualTsv);
// assertEquals(referenceXmi, actualXmi);
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) TypeSystemDescription(org.apache.uima.resource.metadata.TypeSystemDescription) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) File(java.io.File) Test(org.junit.Test)

Aggregations

CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)78 HashMap (java.util.HashMap)53 ParameterSpace (org.dkpro.lab.task.ParameterSpace)51 TcFeatureSet (org.dkpro.tc.api.features.TcFeatureSet)40 Map (java.util.Map)35 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)25 File (java.io.File)19 WekaAdapter (org.dkpro.tc.ml.weka.WekaAdapter)17 Test (org.junit.Test)14 ArrayList (java.util.ArrayList)13 LiblinearAdapter (org.dkpro.tc.ml.liblinear.LiblinearAdapter)9 NaiveBayes (weka.classifiers.bayes.NaiveBayes)9 ExternalResourceDescription (org.apache.uima.resource.ExternalResourceDescription)7 LibsvmAdapter (org.dkpro.tc.ml.libsvm.LibsvmAdapter)7 Gson (com.google.gson.Gson)6 Instance (org.dkpro.tc.api.features.Instance)6 JsonDataWriter (org.dkpro.tc.core.io.JsonDataWriter)6 XgboostAdapter (org.dkpro.tc.ml.xgboost.XgboostAdapter)6 JCasIterable (org.apache.uima.fit.pipeline.JCasIterable)5 JCas (org.apache.uima.jcas.JCas)5