Search in sources :

Example 6 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project webanno by webanno.

the class TcfReaderWriterTest method testOneWay.

public void testOneWay(String aInputFile, String aExpectedFile) throws Exception {
    CollectionReaderDescription reader = createReaderDescription(TcfReader.class, TcfReader.PARAM_SOURCE_LOCATION, "src/test/resources/", TcfReader.PARAM_PATTERNS, aInputFile);
    AnalysisEngineDescription writer = createEngineDescription(TcfWriter.class, TcfWriter.PARAM_TARGET_LOCATION, "target/test-output/oneway", TcfWriter.PARAM_FILENAME_SUFFIX, ".xml", TcfWriter.PARAM_STRIP_EXTENSION, true);
    AnalysisEngineDescription dumper = createEngineDescription(CasDumpWriter.class, CasDumpWriter.PARAM_OUTPUT_FILE, "target/test-output/oneway/dump.txt");
    runPipeline(reader, writer, dumper);
    InputStream isReference = new FileInputStream(new File("src/test/resources/" + aExpectedFile));
    InputStream isActual = new FileInputStream(new File("target/test-output/oneway/" + aInputFile));
    WLData wLDataReference = WLDObjector.read(isReference);
    TextCorpusStored aCorpusDataReference = wLDataReference.getTextCorpus();
    WLData wLDataActual = WLDObjector.read(isActual);
    TextCorpusStored aCorpusDataActual = wLDataActual.getTextCorpus();
    // check if layers maintained
    assertEquals(aCorpusDataReference.getLayers().size(), aCorpusDataActual.getLayers().size());
    // Check if every layers have the same number of annotations
    for (TextCorpusLayer layer : aCorpusDataReference.getLayers()) {
        assertEquals("Layer size mismatch in [" + layer.getClass().getName() + "]", layer.size(), getLayer(aCorpusDataActual, layer.getClass()).size());
    }
    XMLAssert.assertXMLEqual(new InputSource("src/test/resources/" + aExpectedFile), new InputSource(new File("target/test-output/oneway/" + aInputFile).getPath()));
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) TextCorpusLayer(eu.clarin.weblicht.wlfxb.tc.api.TextCorpusLayer) InputSource(org.xml.sax.InputSource) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) TextCorpusStored(eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) WLData(eu.clarin.weblicht.wlfxb.xb.WLData) File(java.io.File) FileInputStream(java.io.FileInputStream)

Example 7 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project webanno by webanno.

the class WebAnnoTsv3XReaderWriterRoundTripTest method runTest.

@Test
public void runTest() throws Exception {
    TypeSystemDescription global = TypeSystemDescriptionFactory.createTypeSystemDescription();
    TypeSystemDescription local;
    if (new File(referenceFolder, "typesystem.xml").exists()) {
        local = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath(new File(referenceFolder, "typesystem.xml").toString());
    } else {
        local = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath("src/test/resources/desc/type/webannoTestTypes.xml");
    }
    TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(asList(global, local));
    String targetFolder = "target/test-output/WebAnnoTsv3XReaderWriterRoundTripTest/" + referenceFolder.getName();
    CollectionReaderDescription reader = createReaderDescription(WebannoTsv3XReader.class, merged, WebannoTsv3XReader.PARAM_SOURCE_LOCATION, referenceFolder, WebannoTsv3XReader.PARAM_PATTERNS, "reference.tsv");
    AnalysisEngineDescription checker = createEngineDescription(DKProCoreConventionsChecker.class);
    AnalysisEngineDescription tsvWriter = createEngineDescription(WebannoTsv3XWriter.class, merged, WebannoTsv3XWriter.PARAM_TARGET_LOCATION, targetFolder, WebannoTsv3XWriter.PARAM_STRIP_EXTENSION, true);
    AnalysisEngineDescription xmiWriter = createEngineDescription(XmiWriter.class, merged, XmiWriter.PARAM_TARGET_LOCATION, targetFolder, XmiWriter.PARAM_STRIP_EXTENSION, true);
    SimplePipeline.runPipeline(reader, checker, tsvWriter, xmiWriter);
    String referenceTsv = FileUtils.readFileToString(new File(referenceFolder, "reference.tsv"), "UTF-8");
    String actualTsv = FileUtils.readFileToString(new File(targetFolder, "reference.tsv"), "UTF-8");
    // 
    // The XMI files here are not compared semantically but using their serialization which
    // is subject to minor variations depending e.g. on the order in which annotation are
    // created in the CAS. Thus, this code is commented out and should only be used on a
    // case-by-case base to compare XMIs during development.
    // 
    // String referenceXmi = FileUtils.readFileToString(new File(referenceFolder,
    // "reference.xmi"),
    // "UTF-8");
    // 
    // String actualXmi = FileUtils.readFileToString(new File(targetFolder, "reference.xmi"),
    // "UTF-8");
    assertEquals(referenceTsv, actualTsv);
// assertEquals(referenceXmi, actualXmi);
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) TypeSystemDescription(org.apache.uima.resource.metadata.TypeSystemDescription) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) File(java.io.File) Test(org.junit.Test)

Example 8 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project webanno by webanno.

the class WebAnnoTsv3XReaderWriterTest method test.

@Test
public void test() throws Exception {
    String targetFolder = "target/test-output/" + testContext.getTestOutputFolderName();
    CollectionReader reader = CollectionReaderFactory.createReader(WebannoTsv3XReader.class, WebannoTsv3XReader.PARAM_SOURCE_LOCATION, "src/test/resources/tsv3/", WebannoTsv3XReader.PARAM_PATTERNS, "coref.tsv");
    AnalysisEngineDescription writer = createEngineDescription(WebannoTsv3XWriter.class, WebannoTsv3XWriter.PARAM_TARGET_LOCATION, targetFolder, WebannoTsv3XWriter.PARAM_STRIP_EXTENSION, true);
    runPipeline(reader, writer);
    CollectionReader reader1 = CollectionReaderFactory.createReader(WebannoTsv3XReader.class, WebannoTsv3XReader.PARAM_SOURCE_LOCATION, "src/test/resources/tsv3/", WebannoTsv3XReader.PARAM_PATTERNS, "coref.tsv");
    CollectionReader reader2 = CollectionReaderFactory.createReader(WebannoTsv3XReader.class, WebannoTsv3XReader.PARAM_SOURCE_LOCATION, targetFolder, WebannoTsv3XReader.PARAM_PATTERNS, "coref.tsv");
    CAS cas1 = JCasFactory.createJCas().getCas();
    reader1.getNext(cas1);
    CAS cas2 = JCasFactory.createJCas().getCas();
    reader2.getNext(cas2);
    assertEquals(JCasUtil.select(cas2.getJCas(), Token.class).size(), JCasUtil.select(cas1.getJCas(), Token.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), POS.class).size(), JCasUtil.select(cas1.getJCas(), POS.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), Lemma.class).size(), JCasUtil.select(cas1.getJCas(), Lemma.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), NamedEntity.class).size(), JCasUtil.select(cas1.getJCas(), NamedEntity.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), Sentence.class).size(), JCasUtil.select(cas1.getJCas(), Sentence.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), Dependency.class).size(), JCasUtil.select(cas1.getJCas(), Dependency.class).size());
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) CAS(org.apache.uima.cas.CAS) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) Test(org.junit.Test)

Example 9 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project webanno by webanno.

the class WebAnnoTsv3ReaderWriterRoundTripTest method runTest.

@Test
public void runTest() throws Exception {
    TypeSystemDescription global = TypeSystemDescriptionFactory.createTypeSystemDescription();
    TypeSystemDescription local;
    if (new File(referenceFolder, "typesystem.xml").exists()) {
        local = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath(new File(referenceFolder, "typesystem.xml").toString());
    } else {
        local = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath("src/test/resources/desc/type/webannoTestTypes.xml");
    }
    TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(asList(global, local));
    String targetFolder = "target/test-output/WebAnnoTsv3ReaderWriterRoundTripTest/" + referenceFolder.getName();
    CollectionReaderDescription reader = createReaderDescription(WebannoTsv3Reader.class, merged, WebannoTsv3Reader.PARAM_SOURCE_LOCATION, referenceFolder, WebannoTsv3Reader.PARAM_PATTERNS, "reference.tsv");
    AnalysisEngineDescription checker = createEngineDescription(DKProCoreConventionsChecker.class);
    // WebannoTsv3Writer doesn't seem to like it if both "SimpleLinkHost" and
    // "ComplexLinkHost" are declared, so I comment out "ComplexLinkHost" which has
    // less tests.
    AnalysisEngineDescription tsvWriter = createEngineDescription(WebannoTsv3Writer.class, merged, WebannoTsv3Writer.PARAM_TARGET_LOCATION, targetFolder, WebannoTsv3Writer.PARAM_STRIP_EXTENSION, true, WebannoTsv3Writer.PARAM_CHAIN_LAYERS, asList("webanno.custom.Simple"), WebannoTsv3Writer.PARAM_SLOT_FEATS, asList("webanno.custom.SimpleLinkHost:links"), WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(NamedEntity.class.getName(), MorphologicalFeatures.class.getName(), POS.class.getName(), Lemma.class.getName(), Stem.class.getName(), "webanno.custom.SimpleSpan", "webanno.custom.SimpleLinkHost"), WebannoTsv3Writer.PARAM_LINK_TYPES, asList("webanno.custom.LinkType"), WebannoTsv3Writer.PARAM_SLOT_TARGETS, asList("webanno.custom.SimpleSpan"), WebannoTsv3Writer.PARAM_RELATION_LAYERS, asList("webanno.custom.SimpleRelation", "webanno.custom.Relation", "webanno.custom.ComplexRelation", Dependency.class.getName()));
    AnalysisEngineDescription xmiWriter = createEngineDescription(XmiWriter.class, merged, XmiWriter.PARAM_TARGET_LOCATION, targetFolder, XmiWriter.PARAM_STRIP_EXTENSION, true);
    try {
        SimplePipeline.runPipeline(reader, checker, tsvWriter, xmiWriter);
    } catch (Throwable e) {
        assumeFalse("This test is known to fail.", isKnownToFail(referenceFolder.getName()));
        throw e;
    }
    String reference = FileUtils.readFileToString(new File(referenceFolder, "reference.tsv"), "UTF-8");
    String actual = FileUtils.readFileToString(new File(targetFolder, "reference.tsv"), "UTF-8");
    // 
    // The XMI files here are not compared semantically but using their serialization which
    // is subject to minor variations depending e.g. on the order in which annotation are
    // created in the CAS. Thus, this code is commented out and should only be used on a
    // case-by-case base to compare XMIs during development.
    // 
    // String referenceXmi = FileUtils.readFileToString(new File(referenceFolder,
    // "reference.xmi"),
    // "UTF-8");
    // 
    // String actualXmi = FileUtils.readFileToString(new File(targetFolder, "reference.xmi"),
    // "UTF-8");
    assumeFalse("This test is known to fail.", isKnownToFail(referenceFolder.getName()));
    assertEquals(reference, actual);
// assertEquals(referenceXmi, actualXmi);
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) TypeSystemDescription(org.apache.uima.resource.metadata.TypeSystemDescription) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) File(java.io.File) Test(org.junit.Test)

Example 10 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.

the class SimilarityPairFeatureTest method similarityPairFeatureTest.

@Test
public void similarityPairFeatureTest() throws Exception {
    ExternalResourceDescription gstResource = ExternalResourceFactory.createExternalResourceDescription(GreedyStringTilingMeasureResource.class, GreedyStringTilingMeasureResource.PARAM_MIN_MATCH_LENGTH, "3");
    AnalysisEngineDescription desc = createEngineDescription(NoOpAnnotator.class);
    AnalysisEngine engine = createEngine(desc);
    JCas jcas = engine.newJCas();
    TokenBuilder<Token, Sentence> tb = new TokenBuilder<Token, Sentence>(Token.class, Sentence.class);
    JCas view1 = jcas.createView(VIEW1);
    view1.setDocumentLanguage("en");
    tb.buildTokens(view1, "This is a test .");
    JCas view2 = jcas.createView(VIEW2);
    view2.setDocumentLanguage("en");
    tb.buildTokens(view2, "Test is this .");
    engine.process(jcas);
    SimilarityPairFeatureExtractor extractor = FeatureUtil.createResource(SimilarityPairFeatureExtractor.class, SimilarityPairFeatureExtractor.PARAM_UNIQUE_EXTRACTOR_NAME, "123", SimilarityPairFeatureExtractor.PARAM_SEGMENT_FEATURE_PATH, Token.class.getName(), SimilarityPairFeatureExtractor.PARAM_TEXT_SIMILARITY_RESOURCE, gstResource);
    Set<Feature> features = extractor.extract(jcas.getView(VIEW1), jcas.getView(VIEW2));
    Assert.assertEquals(1, features.size());
    Iterator<Feature> iter = features.iterator();
    assertFeature("SimilarityGreedyStringTiling_3", 0.8125, iter.next(), 0.0001);
}
Also used : TokenBuilder(org.apache.uima.fit.testing.factory.TokenBuilder) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) FeatureTestUtil.assertFeature(org.dkpro.tc.testing.FeatureTestUtil.assertFeature) Feature(org.dkpro.tc.api.features.Feature) SimilarityPairFeatureExtractor(org.dkpro.tc.features.pair.similarity.SimilarityPairFeatureExtractor) ExternalResourceDescription(org.apache.uima.resource.ExternalResourceDescription) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Aggregations

AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)62 Test (org.junit.Test)32 File (java.io.File)27 CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)25 ArrayList (java.util.ArrayList)22 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)18 JCas (org.apache.uima.jcas.JCas)16 Feature (org.dkpro.tc.api.features.Feature)13 FeatureTestUtil.assertFeature (org.dkpro.tc.testing.FeatureTestUtil.assertFeature)11 ExternalResourceDescription (org.apache.uima.resource.ExternalResourceDescription)10 AggregateBuilder (org.apache.uima.fit.factory.AggregateBuilder)8 ResourceInitializationException (org.apache.uima.resource.ResourceInitializationException)8 JsonDataWriter (org.dkpro.tc.core.io.JsonDataWriter)8 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)7 Gson (com.google.gson.Gson)6 IOException (java.io.IOException)6 Instance (org.dkpro.tc.api.features.Instance)6 OpenNlpPosTagger (de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger)4 BreakIteratorSegmenter (de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter)4 CAS (org.apache.uima.cas.CAS)4