use of org.apache.uima.collection.CollectionReader in project webanno by webanno.
the class XmiWriterReaderTest method write.
public void write() throws Exception {
CollectionReader textReader = CollectionReaderFactory.createReader(TextReader.class, ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, "src/test/resources/texts", ResourceCollectionReaderBase.PARAM_PATTERNS, new String[] { ResourceCollectionReaderBase.INCLUDE_PREFIX + "latin.txt" }, ResourceCollectionReaderBase.PARAM_LANGUAGE, "latin");
AnalysisEngine xmiWriter = AnalysisEngineFactory.createEngine(XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION, testFolder.getRoot().getPath());
runPipeline(textReader, xmiWriter);
assertTrue(new File(testFolder.getRoot(), "latin.txt.xmi").exists());
}
use of org.apache.uima.collection.CollectionReader in project webanno by webanno.
the class WebAnnoTsv2ReaderWriterTest method test.
@Test
public void test() throws Exception {
String targetFolder = "target/test-output/" + testContext.getTestOutputFolderName();
CollectionReader reader = createCollectionReader(WebannoTsv2Reader.class, WebannoTsv2Reader.PARAM_PATH, "src/test/resources/tsv2/", WebannoTsv2Reader.PARAM_PATTERNS, "example2.tsv");
List<String> multipleSpans = new ArrayList<>();
multipleSpans.add("de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity");
multipleSpans.add("de.tudarmstadt.ukp.dkpro.core.api.coref.type.Coreference");
AnalysisEngineDescription writer = createPrimitiveDescription(WebannoTsv2Writer.class, WebannoTsv2Writer.PARAM_TARGET_LOCATION, targetFolder, WebannoTsv2Writer.PARAM_STRIP_EXTENSION, true, WebannoTsv2Writer.MULTIPLE_SPAN_ANNOTATIONS, multipleSpans);
runPipeline(reader, writer);
CollectionReader reader1 = createCollectionReader(WebannoTsv2Reader.class, WebannoTsv2Reader.PARAM_PATH, "src/test/resources/tsv2/", WebannoTsv2Reader.PARAM_PATTERNS, "example2.tsv");
CAS cas1 = JCasFactory.createJCas().getCas();
reader1.getNext(cas1);
CollectionReader reader2 = createCollectionReader(WebannoTsv2Reader.class, WebannoTsv2Reader.PARAM_PATH, targetFolder, WebannoTsv2Reader.PARAM_PATTERNS, "example2.tsv");
CAS cas2 = JCasFactory.createJCas().getCas();
reader2.getNext(cas2);
assertEquals(JCasUtil.select(cas2.getJCas(), Token.class).size(), JCasUtil.select(cas1.getJCas(), Token.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), POS.class).size(), JCasUtil.select(cas1.getJCas(), POS.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), Lemma.class).size(), JCasUtil.select(cas1.getJCas(), Lemma.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), NamedEntity.class).size(), JCasUtil.select(cas1.getJCas(), NamedEntity.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), Sentence.class).size(), JCasUtil.select(cas1.getJCas(), Sentence.class).size());
}
use of org.apache.uima.collection.CollectionReader in project webanno by webanno.
the class WebAnnoTsv3ReaderWriterTest method test.
@Test
public void test() throws Exception {
String targetFolder = "target/test-output/" + testContext.getTestOutputFolderName();
CollectionReader reader = CollectionReaderFactory.createReader(WebannoTsv3Reader.class, WebannoTsv3Reader.PARAM_SOURCE_LOCATION, "src/test/resources/tsv3/", WebannoTsv3Reader.PARAM_PATTERNS, "coref.tsv");
List<String> slotFeatures = new ArrayList<>();
List<String> slotTargets = new ArrayList<>();
List<String> linkTypes = new ArrayList<>();
List<String> spanLayers = new ArrayList<>();
spanLayers.add(NamedEntity.class.getName());
spanLayers.add(POS.class.getName());
spanLayers.add(Lemma.class.getName());
List<String> chainLayers = new ArrayList<>();
chainLayers.add("de.tudarmstadt.ukp.dkpro.core.api.coref.type.Coreference");
List<String> relationLayers = new ArrayList<>();
relationLayers.add(Dependency.class.getName());
AnalysisEngineDescription writer = createEngineDescription(WebannoTsv3Writer.class, WebannoTsv3Writer.PARAM_TARGET_LOCATION, targetFolder, WebannoTsv3Writer.PARAM_STRIP_EXTENSION, true, WebannoTsv3Writer.PARAM_SPAN_LAYERS, spanLayers, WebannoTsv3Writer.PARAM_SLOT_FEATS, slotFeatures, WebannoTsv3Writer.PARAM_SLOT_TARGETS, slotTargets, WebannoTsv3Writer.PARAM_LINK_TYPES, linkTypes, WebannoTsv3Writer.PARAM_CHAIN_LAYERS, chainLayers, WebannoTsv3Writer.PARAM_RELATION_LAYERS, relationLayers);
runPipeline(reader, writer);
CollectionReader reader1 = CollectionReaderFactory.createReader(WebannoTsv3Reader.class, WebannoTsv3Reader.PARAM_SOURCE_LOCATION, "src/test/resources/tsv3/", WebannoTsv3Reader.PARAM_PATTERNS, "coref.tsv");
CollectionReader reader2 = CollectionReaderFactory.createReader(WebannoTsv3Reader.class, WebannoTsv3Reader.PARAM_SOURCE_LOCATION, targetFolder, WebannoTsv3Reader.PARAM_PATTERNS, "coref.tsv");
CAS cas1 = JCasFactory.createJCas().getCas();
reader1.getNext(cas1);
CAS cas2 = JCasFactory.createJCas().getCas();
reader2.getNext(cas2);
assertEquals(JCasUtil.select(cas2.getJCas(), Token.class).size(), JCasUtil.select(cas1.getJCas(), Token.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), POS.class).size(), JCasUtil.select(cas1.getJCas(), POS.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), Lemma.class).size(), JCasUtil.select(cas1.getJCas(), Lemma.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), NamedEntity.class).size(), JCasUtil.select(cas1.getJCas(), NamedEntity.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), Sentence.class).size(), JCasUtil.select(cas1.getJCas(), Sentence.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), Dependency.class).size(), JCasUtil.select(cas1.getJCas(), Dependency.class).size());
}
use of org.apache.uima.collection.CollectionReader in project dkpro-lab by dkpro.
the class SimpleExecutionEngine method run.
@Override
public String run(Task aConfiguration) throws ExecutionException, LifeCycleException {
if (!(aConfiguration instanceof UimaTask)) {
throw new ExecutionException("This engine can only execute [" + UimaTask.class.getName() + "]");
}
UimaTask configuration = (UimaTask) aConfiguration;
// Create persistence service for injection into analysis components
TaskContext ctx = contextFactory.createContext(aConfiguration);
try {
ResourceManager resMgr = newDefaultResourceManager();
// Make sure the descriptor is fully resolved. It will be modified and
// thus should not be modified again afterwards by UIMA.
AnalysisEngineDescription analysisDesc = configuration.getAnalysisEngineDescription(ctx);
analysisDesc.resolveImports(resMgr);
if (analysisDesc.getMetaData().getName() == null) {
analysisDesc.getMetaData().setName("Analysis for " + aConfiguration.getType());
}
// Scan components that accept the service and bind it to them
bindResource(analysisDesc, TaskContext.class, TaskContextProvider.class, TaskContextProvider.PARAM_FACTORY_NAME, contextFactory.getId(), TaskContextProvider.PARAM_CONTEXT_ID, ctx.getId());
// Set up UIMA context & logging
Logger logger = new UimaLoggingAdapter(ctx);
UimaContextAdmin uimaCtx = newUimaContext(logger, resMgr, newConfigurationManager());
// Set up reader
CollectionReaderDescription readerDesc = configuration.getCollectionReaderDescription(ctx);
if (readerDesc.getMetaData().getName() == null) {
readerDesc.getMetaData().setName("Reader for " + aConfiguration.getType());
}
Map<String, Object> addReaderParam = new HashMap<String, Object>();
addReaderParam.put(Resource.PARAM_UIMA_CONTEXT, uimaCtx);
addReaderParam.put(Resource.PARAM_RESOURCE_MANAGER, resMgr);
CollectionReader reader = produceCollectionReader(readerDesc, resMgr, addReaderParam);
// Set up analysis engine
AnalysisEngine engine;
if (analysisDesc.isPrimitive()) {
engine = new PrimitiveAnalysisEngine_impl();
} else {
engine = new AggregateAnalysisEngine_impl();
}
Map<String, Object> addEngineParam = new HashMap<String, Object>();
addReaderParam.put(Resource.PARAM_UIMA_CONTEXT, uimaCtx);
addReaderParam.put(Resource.PARAM_RESOURCE_MANAGER, resMgr);
engine.initialize(analysisDesc, addEngineParam);
// Now the setup is complete
ctx.getLifeCycleManager().initialize(ctx, aConfiguration);
// Start recording
ctx.getLifeCycleManager().begin(ctx, aConfiguration);
// Run the experiment
// Apply the engine to all documents provided by the reader
List<ResourceMetaData> metaData = new ArrayList<ResourceMetaData>();
metaData.add(reader.getMetaData());
metaData.add(engine.getMetaData());
CAS cas = CasCreationUtils.createCas(metaData);
while (reader.hasNext()) {
reader.getNext(cas);
engine.process(cas);
String documentTitle = "";
Feature documentTitleFeature = cas.getDocumentAnnotation().getType().getFeatureByBaseName("documentTitle");
if (documentTitleFeature != null) {
documentTitle = cas.getDocumentAnnotation().getFeatureValueAsString(documentTitleFeature);
}
cas.reset();
Progress[] progresses = reader.getProgress();
if (progresses != null) {
for (Progress p : progresses) {
ctx.message("Progress " + readerDesc.getImplementationName() + " " + p.getCompleted() + "/" + p.getTotal() + " " + p.getUnit() + " " + "(" + documentTitle + ")");
}
}
}
// Shut down engine and reader
engine.collectionProcessComplete();
reader.close();
engine.destroy();
reader.destroy();
// End recording
ctx.getLifeCycleManager().complete(ctx, aConfiguration);
return ctx.getId();
} catch (LifeCycleException e) {
ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
throw e;
} catch (Throwable e) {
ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
throw new ExecutionException(e);
} finally {
if (ctx != null) {
ctx.getLifeCycleManager().destroy(ctx, aConfiguration);
}
}
}
use of org.apache.uima.collection.CollectionReader in project dkpro-tc by dkpro.
the class LiblinearSaveAndLoadModelDocumentRegression method regressionLoadModel.
private void regressionLoadModel(File modelFolder) throws UIMAException, IOException {
CollectionReader reader = CollectionReaderFactory.createReader(LinewiseTextOutcomeReader.class, LinewiseTextOutcomeReader.PARAM_OUTCOME_INDEX, 0, LinewiseTextOutcomeReader.PARAM_TEXT_INDEX, 1, LinewiseTextOutcomeReader.PARAM_SOURCE_LOCATION, regressionTest, LinewiseTextOutcomeReader.PARAM_LANGUAGE, "en");
AnalysisEngine segmenter = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
JCas jcas = JCasFactory.createJCas();
reader.hasNext();
reader.getNext(jcas.getCas());
segmenter.process(jcas);
tcAnno.process(jcas);
List<TextClassificationOutcome> outcomes = new ArrayList<>(JCasUtil.select(jcas, TextClassificationOutcome.class));
assertEquals(1, outcomes.size());
Double d = Double.valueOf(outcomes.get(0).getOutcome());
assertTrue(d > 0.1 && d < 5);
}
Aggregations