Search in sources :

Example 36 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project webanno by webanno.

the class WebAnnoTsv3ReaderWriterTest method test.

@Test
public void test() throws Exception {
    String targetFolder = "target/test-output/" + testContext.getTestOutputFolderName();
    CollectionReader reader = CollectionReaderFactory.createReader(WebannoTsv3Reader.class, WebannoTsv3Reader.PARAM_SOURCE_LOCATION, "src/test/resources/tsv3/", WebannoTsv3Reader.PARAM_PATTERNS, "coref.tsv");
    List<String> slotFeatures = new ArrayList<>();
    List<String> slotTargets = new ArrayList<>();
    List<String> linkTypes = new ArrayList<>();
    List<String> spanLayers = new ArrayList<>();
    spanLayers.add(NamedEntity.class.getName());
    spanLayers.add(POS.class.getName());
    spanLayers.add(Lemma.class.getName());
    List<String> chainLayers = new ArrayList<>();
    chainLayers.add("de.tudarmstadt.ukp.dkpro.core.api.coref.type.Coreference");
    List<String> relationLayers = new ArrayList<>();
    relationLayers.add(Dependency.class.getName());
    AnalysisEngineDescription writer = createEngineDescription(WebannoTsv3Writer.class, WebannoTsv3Writer.PARAM_TARGET_LOCATION, targetFolder, WebannoTsv3Writer.PARAM_STRIP_EXTENSION, true, WebannoTsv3Writer.PARAM_SPAN_LAYERS, spanLayers, WebannoTsv3Writer.PARAM_SLOT_FEATS, slotFeatures, WebannoTsv3Writer.PARAM_SLOT_TARGETS, slotTargets, WebannoTsv3Writer.PARAM_LINK_TYPES, linkTypes, WebannoTsv3Writer.PARAM_CHAIN_LAYERS, chainLayers, WebannoTsv3Writer.PARAM_RELATION_LAYERS, relationLayers);
    runPipeline(reader, writer);
    CollectionReader reader1 = CollectionReaderFactory.createReader(WebannoTsv3Reader.class, WebannoTsv3Reader.PARAM_SOURCE_LOCATION, "src/test/resources/tsv3/", WebannoTsv3Reader.PARAM_PATTERNS, "coref.tsv");
    CollectionReader reader2 = CollectionReaderFactory.createReader(WebannoTsv3Reader.class, WebannoTsv3Reader.PARAM_SOURCE_LOCATION, targetFolder, WebannoTsv3Reader.PARAM_PATTERNS, "coref.tsv");
    CAS cas1 = JCasFactory.createJCas().getCas();
    reader1.getNext(cas1);
    CAS cas2 = JCasFactory.createJCas().getCas();
    reader2.getNext(cas2);
    assertEquals(JCasUtil.select(cas2.getJCas(), Token.class).size(), JCasUtil.select(cas1.getJCas(), Token.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), POS.class).size(), JCasUtil.select(cas1.getJCas(), POS.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), Lemma.class).size(), JCasUtil.select(cas1.getJCas(), Lemma.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), NamedEntity.class).size(), JCasUtil.select(cas1.getJCas(), NamedEntity.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), Sentence.class).size(), JCasUtil.select(cas1.getJCas(), Sentence.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), Dependency.class).size(), JCasUtil.select(cas1.getJCas(), Dependency.class).size());
}
Also used : NamedEntity(de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity) CollectionReader(org.apache.uima.collection.CollectionReader) CAS(org.apache.uima.cas.CAS) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) Lemma(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma) ArrayList(java.util.ArrayList) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency) Test(org.junit.Test)

Example 37 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-lab by dkpro.

the class UimaAsExecutionEngineTest method testInit.

@Test
public void testInit() throws Exception {
    File repo = new File("target/repository");
    FileUtils.deleteQuietly(repo);
    ((FileSystemStorageService) storageService).setStorageRoot(repo);
    assertNotNull(executionService);
    assertNotNull(contextFactory);
    AnalysisEngineDescription desc = createEngineDescription(DummyAE.class);
    DefaultUimaTask cfg = new DefaultUimaTask();
    cfg.setReaderDescription(createReaderDescription(TestReader.class));
    cfg.setAnalysisEngineDescription(desc);
    TaskExecutionEngine runner = executionService.createEngine(cfg);
    String uuid = runner.run(cfg);
    System.out.println("=== Experiments in repository ===");
    List<TaskContextMetadata> experiments = storageService.getContexts();
    for (TaskContextMetadata e : experiments) {
        System.out.println(e);
    }
    final StringBuilder sb = new StringBuilder();
    storageService.retrieveBinary(uuid, "test", new StreamReader() {

        @Override
        public void read(InputStream aInputStream) throws IOException {
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            Util.shoveAndClose(aInputStream, bos);
            sb.append(new String(bos.toByteArray(), "UTF-8"));
        }
    });
    assertEquals("works", sb.toString());
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) TaskExecutionEngine(org.dkpro.lab.engine.TaskExecutionEngine) IOException(java.io.IOException) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DefaultUimaTask(org.dkpro.lab.uima.task.impl.DefaultUimaTask) TaskContextMetadata(org.dkpro.lab.task.TaskContextMetadata) StreamReader(org.dkpro.lab.storage.StreamReader) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) File(java.io.File) FileSystemStorageService(org.dkpro.lab.storage.filesystem.FileSystemStorageService) Test(org.junit.Test)

Example 38 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-lab by dkpro.

the class UimaTaskBase method persist.

@Override
public void persist(final TaskContext aContext) throws IOException {
    super.persist(aContext);
    aContext.storeBinary(COLLECTION_READER_DESC_KEY, new StreamWriter() {

        @Override
        public void write(OutputStream aStream) throws Exception {
            getCollectionReaderDescription(aContext).toXML(aStream);
        }
    });
    aContext.storeBinary(ANALYSIS_ENGINE_DESC_KEY, new StreamWriter() {

        @Override
        public void write(OutputStream aStream) throws Exception {
            AnalysisEngineDescription analysisDesc = getAnalysisEngineDescription(aContext);
            // FIXME should use the same resource manager here
            // as the engine uses!
            analysisDesc.resolveImports(UIMAFramework.newDefaultResourceManager());
            analysisDesc.toXML(aStream);
        }
    });
}
Also used : StreamWriter(org.dkpro.lab.storage.StreamWriter) OutputStream(java.io.OutputStream) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) IOException(java.io.IOException) ResourceInitializationException(org.apache.uima.resource.ResourceInitializationException)

Example 39 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-lab by dkpro.

the class SimpleExecutionEngine method run.

@Override
public String run(Task aConfiguration) throws ExecutionException, LifeCycleException {
    if (!(aConfiguration instanceof UimaTask)) {
        throw new ExecutionException("This engine can only execute [" + UimaTask.class.getName() + "]");
    }
    UimaTask configuration = (UimaTask) aConfiguration;
    // Create persistence service for injection into analysis components
    TaskContext ctx = contextFactory.createContext(aConfiguration);
    try {
        ResourceManager resMgr = newDefaultResourceManager();
        // Make sure the descriptor is fully resolved. It will be modified and
        // thus should not be modified again afterwards by UIMA.
        AnalysisEngineDescription analysisDesc = configuration.getAnalysisEngineDescription(ctx);
        analysisDesc.resolveImports(resMgr);
        if (analysisDesc.getMetaData().getName() == null) {
            analysisDesc.getMetaData().setName("Analysis for " + aConfiguration.getType());
        }
        // Scan components that accept the service and bind it to them
        bindResource(analysisDesc, TaskContext.class, TaskContextProvider.class, TaskContextProvider.PARAM_FACTORY_NAME, contextFactory.getId(), TaskContextProvider.PARAM_CONTEXT_ID, ctx.getId());
        // Set up UIMA context & logging
        Logger logger = new UimaLoggingAdapter(ctx);
        UimaContextAdmin uimaCtx = newUimaContext(logger, resMgr, newConfigurationManager());
        // Set up reader
        CollectionReaderDescription readerDesc = configuration.getCollectionReaderDescription(ctx);
        if (readerDesc.getMetaData().getName() == null) {
            readerDesc.getMetaData().setName("Reader for " + aConfiguration.getType());
        }
        Map<String, Object> addReaderParam = new HashMap<String, Object>();
        addReaderParam.put(Resource.PARAM_UIMA_CONTEXT, uimaCtx);
        addReaderParam.put(Resource.PARAM_RESOURCE_MANAGER, resMgr);
        CollectionReader reader = produceCollectionReader(readerDesc, resMgr, addReaderParam);
        // Set up analysis engine
        AnalysisEngine engine;
        if (analysisDesc.isPrimitive()) {
            engine = new PrimitiveAnalysisEngine_impl();
        } else {
            engine = new AggregateAnalysisEngine_impl();
        }
        Map<String, Object> addEngineParam = new HashMap<String, Object>();
        addReaderParam.put(Resource.PARAM_UIMA_CONTEXT, uimaCtx);
        addReaderParam.put(Resource.PARAM_RESOURCE_MANAGER, resMgr);
        engine.initialize(analysisDesc, addEngineParam);
        // Now the setup is complete
        ctx.getLifeCycleManager().initialize(ctx, aConfiguration);
        // Start recording
        ctx.getLifeCycleManager().begin(ctx, aConfiguration);
        // Run the experiment
        // Apply the engine to all documents provided by the reader
        List<ResourceMetaData> metaData = new ArrayList<ResourceMetaData>();
        metaData.add(reader.getMetaData());
        metaData.add(engine.getMetaData());
        CAS cas = CasCreationUtils.createCas(metaData);
        while (reader.hasNext()) {
            reader.getNext(cas);
            engine.process(cas);
            String documentTitle = "";
            Feature documentTitleFeature = cas.getDocumentAnnotation().getType().getFeatureByBaseName("documentTitle");
            if (documentTitleFeature != null) {
                documentTitle = cas.getDocumentAnnotation().getFeatureValueAsString(documentTitleFeature);
            }
            cas.reset();
            Progress[] progresses = reader.getProgress();
            if (progresses != null) {
                for (Progress p : progresses) {
                    ctx.message("Progress " + readerDesc.getImplementationName() + " " + p.getCompleted() + "/" + p.getTotal() + " " + p.getUnit() + " " + "(" + documentTitle + ")");
                }
            }
        }
        // Shut down engine and reader
        engine.collectionProcessComplete();
        reader.close();
        engine.destroy();
        reader.destroy();
        // End recording
        ctx.getLifeCycleManager().complete(ctx, aConfiguration);
        return ctx.getId();
    } catch (LifeCycleException e) {
        ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
        throw e;
    } catch (Throwable e) {
        ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
        throw new ExecutionException(e);
    } finally {
        if (ctx != null) {
            ctx.getLifeCycleManager().destroy(ctx, aConfiguration);
        }
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LifeCycleException(org.dkpro.lab.engine.LifeCycleException) Logger(org.apache.uima.util.Logger) Feature(org.apache.uima.cas.Feature) PrimitiveAnalysisEngine_impl(org.apache.uima.analysis_engine.impl.PrimitiveAnalysisEngine_impl) UimaTask(org.dkpro.lab.uima.task.UimaTask) ExecutionException(org.dkpro.lab.engine.ExecutionException) UimaLoggingAdapter(org.dkpro.lab.uima.task.impl.UimaLoggingAdapter) Progress(org.apache.uima.util.Progress) TaskContext(org.dkpro.lab.engine.TaskContext) UIMAFramework.produceCollectionReader(org.apache.uima.UIMAFramework.produceCollectionReader) CollectionReader(org.apache.uima.collection.CollectionReader) ResourceManager(org.apache.uima.resource.ResourceManager) UIMAFramework.newDefaultResourceManager(org.apache.uima.UIMAFramework.newDefaultResourceManager) AggregateAnalysisEngine_impl(org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl) CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) CAS(org.apache.uima.cas.CAS) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) UimaContextAdmin(org.apache.uima.UimaContextAdmin) ResourceMetaData(org.apache.uima.resource.metadata.ResourceMetaData) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine)

Example 40 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-lab by dkpro.

the class UimaAsExecutionEngine method run.

@Override
public String run(Task aConfiguration) throws ExecutionException, LifeCycleException {
    if (!(aConfiguration instanceof UimaTask)) {
        throw new ExecutionException("This engine can only execute [" + UimaTask.class.getName() + "]");
    }
    configuration = (UimaTask) aConfiguration;
    ctx = contextFactory.createContext(aConfiguration);
    try {
        ResourceManager resMgr = newDefaultResourceManager();
        // Make sure the descriptor is fully resolved. It will be modified and
        // thus should not be modified again afterwards by UIMA.
        AnalysisEngineDescription analysisDesc = configuration.getAnalysisEngineDescription(ctx);
        analysisDesc.resolveImports(resMgr);
        // Scan components that accept the service and bind it to them
        bindResource(analysisDesc, TaskContext.class, TaskContextProvider.class, TaskContextProvider.PARAM_FACTORY_NAME, contextFactory.getId(), TaskContextProvider.PARAM_CONTEXT_ID, ctx.getId());
        ctx.message("Bound external resources");
        // Now the setup is complete
        ctx.getLifeCycleManager().initialize(ctx, aConfiguration);
        // Deploy experiment as UIMA-AS service
        initializeService();
        // Initialize the client
        initializeClient();
        // Start recording
        ctx.getLifeCycleManager().begin(ctx, aConfiguration);
        // Run the experiment
        process();
        // End recording
        ctx.getLifeCycleManager().complete(ctx, aConfiguration);
        return ctx.getId();
    } catch (LifeCycleException e) {
        ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
        throw e;
    } catch (Exception e) {
        ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
        throw new ExecutionException(e);
    } finally {
        destroy();
    }
}
Also used : AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) ResourceManager(org.apache.uima.resource.ResourceManager) UIMAFramework.newDefaultResourceManager(org.apache.uima.UIMAFramework.newDefaultResourceManager) LifeCycleException(org.dkpro.lab.engine.LifeCycleException) UimaTask(org.dkpro.lab.uima.task.UimaTask) ExecutionException(org.dkpro.lab.engine.ExecutionException) ResourceInitializationException(org.apache.uima.resource.ResourceInitializationException) ResourceProcessException(org.apache.uima.resource.ResourceProcessException) IOException(java.io.IOException) ExecutionException(org.dkpro.lab.engine.ExecutionException) LifeCycleException(org.dkpro.lab.engine.LifeCycleException)

Aggregations

AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)62 Test (org.junit.Test)32 File (java.io.File)27 CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)25 ArrayList (java.util.ArrayList)22 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)18 JCas (org.apache.uima.jcas.JCas)16 Feature (org.dkpro.tc.api.features.Feature)13 FeatureTestUtil.assertFeature (org.dkpro.tc.testing.FeatureTestUtil.assertFeature)11 ExternalResourceDescription (org.apache.uima.resource.ExternalResourceDescription)10 AggregateBuilder (org.apache.uima.fit.factory.AggregateBuilder)8 ResourceInitializationException (org.apache.uima.resource.ResourceInitializationException)8 JsonDataWriter (org.dkpro.tc.core.io.JsonDataWriter)8 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)7 Gson (com.google.gson.Gson)6 IOException (java.io.IOException)6 Instance (org.dkpro.tc.api.features.Instance)6 OpenNlpPosTagger (de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger)4 BreakIteratorSegmenter (de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter)4 CAS (org.apache.uima.cas.CAS)4