use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project webanno by webanno.
the class TcfReaderWriterTest method testOneWay.
public void testOneWay(String aInputFile, String aExpectedFile) throws Exception {
CollectionReaderDescription reader = createReaderDescription(TcfReader.class, TcfReader.PARAM_SOURCE_LOCATION, "src/test/resources/", TcfReader.PARAM_PATTERNS, aInputFile);
AnalysisEngineDescription writer = createEngineDescription(TcfWriter.class, TcfWriter.PARAM_TARGET_LOCATION, "target/test-output/oneway", TcfWriter.PARAM_FILENAME_SUFFIX, ".xml", TcfWriter.PARAM_STRIP_EXTENSION, true);
AnalysisEngineDescription dumper = createEngineDescription(CasDumpWriter.class, CasDumpWriter.PARAM_OUTPUT_FILE, "target/test-output/oneway/dump.txt");
runPipeline(reader, writer, dumper);
InputStream isReference = new FileInputStream(new File("src/test/resources/" + aExpectedFile));
InputStream isActual = new FileInputStream(new File("target/test-output/oneway/" + aInputFile));
WLData wLDataReference = WLDObjector.read(isReference);
TextCorpusStored aCorpusDataReference = wLDataReference.getTextCorpus();
WLData wLDataActual = WLDObjector.read(isActual);
TextCorpusStored aCorpusDataActual = wLDataActual.getTextCorpus();
// check if layers maintained
assertEquals(aCorpusDataReference.getLayers().size(), aCorpusDataActual.getLayers().size());
// Check if every layers have the same number of annotations
for (TextCorpusLayer layer : aCorpusDataReference.getLayers()) {
assertEquals("Layer size mismatch in [" + layer.getClass().getName() + "]", layer.size(), getLayer(aCorpusDataActual, layer.getClass()).size());
}
XMLAssert.assertXMLEqual(new InputSource("src/test/resources/" + aExpectedFile), new InputSource(new File("target/test-output/oneway/" + aInputFile).getPath()));
}
use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project webanno by webanno.
the class WebAnnoTsv3XReaderWriterRoundTripTest method runTest.
@Test
public void runTest() throws Exception {
TypeSystemDescription global = TypeSystemDescriptionFactory.createTypeSystemDescription();
TypeSystemDescription local;
if (new File(referenceFolder, "typesystem.xml").exists()) {
local = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath(new File(referenceFolder, "typesystem.xml").toString());
} else {
local = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath("src/test/resources/desc/type/webannoTestTypes.xml");
}
TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(asList(global, local));
String targetFolder = "target/test-output/WebAnnoTsv3XReaderWriterRoundTripTest/" + referenceFolder.getName();
CollectionReaderDescription reader = createReaderDescription(WebannoTsv3XReader.class, merged, WebannoTsv3XReader.PARAM_SOURCE_LOCATION, referenceFolder, WebannoTsv3XReader.PARAM_PATTERNS, "reference.tsv");
AnalysisEngineDescription checker = createEngineDescription(DKProCoreConventionsChecker.class);
AnalysisEngineDescription tsvWriter = createEngineDescription(WebannoTsv3XWriter.class, merged, WebannoTsv3XWriter.PARAM_TARGET_LOCATION, targetFolder, WebannoTsv3XWriter.PARAM_STRIP_EXTENSION, true);
AnalysisEngineDescription xmiWriter = createEngineDescription(XmiWriter.class, merged, XmiWriter.PARAM_TARGET_LOCATION, targetFolder, XmiWriter.PARAM_STRIP_EXTENSION, true);
SimplePipeline.runPipeline(reader, checker, tsvWriter, xmiWriter);
String referenceTsv = FileUtils.readFileToString(new File(referenceFolder, "reference.tsv"), "UTF-8");
String actualTsv = FileUtils.readFileToString(new File(targetFolder, "reference.tsv"), "UTF-8");
//
// The XMI files here are not compared semantically but using their serialization which
// is subject to minor variations depending e.g. on the order in which annotation are
// created in the CAS. Thus, this code is commented out and should only be used on a
// case-by-case base to compare XMIs during development.
//
// String referenceXmi = FileUtils.readFileToString(new File(referenceFolder,
// "reference.xmi"),
// "UTF-8");
//
// String actualXmi = FileUtils.readFileToString(new File(targetFolder, "reference.xmi"),
// "UTF-8");
assertEquals(referenceTsv, actualTsv);
// assertEquals(referenceXmi, actualXmi);
}
use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project webanno by webanno.
the class WebAnnoTsv3XReaderWriterTest method test.
@Test
public void test() throws Exception {
String targetFolder = "target/test-output/" + testContext.getTestOutputFolderName();
CollectionReader reader = CollectionReaderFactory.createReader(WebannoTsv3XReader.class, WebannoTsv3XReader.PARAM_SOURCE_LOCATION, "src/test/resources/tsv3/", WebannoTsv3XReader.PARAM_PATTERNS, "coref.tsv");
AnalysisEngineDescription writer = createEngineDescription(WebannoTsv3XWriter.class, WebannoTsv3XWriter.PARAM_TARGET_LOCATION, targetFolder, WebannoTsv3XWriter.PARAM_STRIP_EXTENSION, true);
runPipeline(reader, writer);
CollectionReader reader1 = CollectionReaderFactory.createReader(WebannoTsv3XReader.class, WebannoTsv3XReader.PARAM_SOURCE_LOCATION, "src/test/resources/tsv3/", WebannoTsv3XReader.PARAM_PATTERNS, "coref.tsv");
CollectionReader reader2 = CollectionReaderFactory.createReader(WebannoTsv3XReader.class, WebannoTsv3XReader.PARAM_SOURCE_LOCATION, targetFolder, WebannoTsv3XReader.PARAM_PATTERNS, "coref.tsv");
CAS cas1 = JCasFactory.createJCas().getCas();
reader1.getNext(cas1);
CAS cas2 = JCasFactory.createJCas().getCas();
reader2.getNext(cas2);
assertEquals(JCasUtil.select(cas2.getJCas(), Token.class).size(), JCasUtil.select(cas1.getJCas(), Token.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), POS.class).size(), JCasUtil.select(cas1.getJCas(), POS.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), Lemma.class).size(), JCasUtil.select(cas1.getJCas(), Lemma.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), NamedEntity.class).size(), JCasUtil.select(cas1.getJCas(), NamedEntity.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), Sentence.class).size(), JCasUtil.select(cas1.getJCas(), Sentence.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), Dependency.class).size(), JCasUtil.select(cas1.getJCas(), Dependency.class).size());
}
use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project webanno by webanno.
the class WebAnnoTsv3ReaderWriterRoundTripTest method runTest.
@Test
public void runTest() throws Exception {
TypeSystemDescription global = TypeSystemDescriptionFactory.createTypeSystemDescription();
TypeSystemDescription local;
if (new File(referenceFolder, "typesystem.xml").exists()) {
local = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath(new File(referenceFolder, "typesystem.xml").toString());
} else {
local = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath("src/test/resources/desc/type/webannoTestTypes.xml");
}
TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(asList(global, local));
String targetFolder = "target/test-output/WebAnnoTsv3ReaderWriterRoundTripTest/" + referenceFolder.getName();
CollectionReaderDescription reader = createReaderDescription(WebannoTsv3Reader.class, merged, WebannoTsv3Reader.PARAM_SOURCE_LOCATION, referenceFolder, WebannoTsv3Reader.PARAM_PATTERNS, "reference.tsv");
AnalysisEngineDescription checker = createEngineDescription(DKProCoreConventionsChecker.class);
// WebannoTsv3Writer doesn't seem to like it if both "SimpleLinkHost" and
// "ComplexLinkHost" are declared, so I comment out "ComplexLinkHost" which has
// less tests.
AnalysisEngineDescription tsvWriter = createEngineDescription(WebannoTsv3Writer.class, merged, WebannoTsv3Writer.PARAM_TARGET_LOCATION, targetFolder, WebannoTsv3Writer.PARAM_STRIP_EXTENSION, true, WebannoTsv3Writer.PARAM_CHAIN_LAYERS, asList("webanno.custom.Simple"), WebannoTsv3Writer.PARAM_SLOT_FEATS, asList("webanno.custom.SimpleLinkHost:links"), WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(NamedEntity.class.getName(), MorphologicalFeatures.class.getName(), POS.class.getName(), Lemma.class.getName(), Stem.class.getName(), "webanno.custom.SimpleSpan", "webanno.custom.SimpleLinkHost"), WebannoTsv3Writer.PARAM_LINK_TYPES, asList("webanno.custom.LinkType"), WebannoTsv3Writer.PARAM_SLOT_TARGETS, asList("webanno.custom.SimpleSpan"), WebannoTsv3Writer.PARAM_RELATION_LAYERS, asList("webanno.custom.SimpleRelation", "webanno.custom.Relation", "webanno.custom.ComplexRelation", Dependency.class.getName()));
AnalysisEngineDescription xmiWriter = createEngineDescription(XmiWriter.class, merged, XmiWriter.PARAM_TARGET_LOCATION, targetFolder, XmiWriter.PARAM_STRIP_EXTENSION, true);
try {
SimplePipeline.runPipeline(reader, checker, tsvWriter, xmiWriter);
} catch (Throwable e) {
assumeFalse("This test is known to fail.", isKnownToFail(referenceFolder.getName()));
throw e;
}
String reference = FileUtils.readFileToString(new File(referenceFolder, "reference.tsv"), "UTF-8");
String actual = FileUtils.readFileToString(new File(targetFolder, "reference.tsv"), "UTF-8");
//
// The XMI files here are not compared semantically but using their serialization which
// is subject to minor variations depending e.g. on the order in which annotation are
// created in the CAS. Thus, this code is commented out and should only be used on a
// case-by-case base to compare XMIs during development.
//
// String referenceXmi = FileUtils.readFileToString(new File(referenceFolder,
// "reference.xmi"),
// "UTF-8");
//
// String actualXmi = FileUtils.readFileToString(new File(targetFolder, "reference.xmi"),
// "UTF-8");
assumeFalse("This test is known to fail.", isKnownToFail(referenceFolder.getName()));
assertEquals(reference, actual);
// assertEquals(referenceXmi, actualXmi);
}
use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.
the class SimilarityPairFeatureTest method similarityPairFeatureTest.
@Test
public void similarityPairFeatureTest() throws Exception {
ExternalResourceDescription gstResource = ExternalResourceFactory.createExternalResourceDescription(GreedyStringTilingMeasureResource.class, GreedyStringTilingMeasureResource.PARAM_MIN_MATCH_LENGTH, "3");
AnalysisEngineDescription desc = createEngineDescription(NoOpAnnotator.class);
AnalysisEngine engine = createEngine(desc);
JCas jcas = engine.newJCas();
TokenBuilder<Token, Sentence> tb = new TokenBuilder<Token, Sentence>(Token.class, Sentence.class);
JCas view1 = jcas.createView(VIEW1);
view1.setDocumentLanguage("en");
tb.buildTokens(view1, "This is a test .");
JCas view2 = jcas.createView(VIEW2);
view2.setDocumentLanguage("en");
tb.buildTokens(view2, "Test is this .");
engine.process(jcas);
SimilarityPairFeatureExtractor extractor = FeatureUtil.createResource(SimilarityPairFeatureExtractor.class, SimilarityPairFeatureExtractor.PARAM_UNIQUE_EXTRACTOR_NAME, "123", SimilarityPairFeatureExtractor.PARAM_SEGMENT_FEATURE_PATH, Token.class.getName(), SimilarityPairFeatureExtractor.PARAM_TEXT_SIMILARITY_RESOURCE, gstResource);
Set<Feature> features = extractor.extract(jcas.getView(VIEW1), jcas.getView(VIEW2));
Assert.assertEquals(1, features.size());
Iterator<Feature> iter = features.iterator();
assertFeature("SimilarityGreedyStringTiling_3", 0.8125, iter.next(), 0.0001);
}
Aggregations