Search in sources :

Example 21 with CollectionReader

use of org.apache.uima.collection.CollectionReader in project webanno by webanno.

the class XmiWriterReaderTest method write.

public void write() throws Exception {
    CollectionReader textReader = CollectionReaderFactory.createReader(TextReader.class, ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, "src/test/resources/texts", ResourceCollectionReaderBase.PARAM_PATTERNS, new String[] { ResourceCollectionReaderBase.INCLUDE_PREFIX + "latin.txt" }, ResourceCollectionReaderBase.PARAM_LANGUAGE, "latin");
    AnalysisEngine xmiWriter = AnalysisEngineFactory.createEngine(XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION, testFolder.getRoot().getPath());
    runPipeline(textReader, xmiWriter);
    assertTrue(new File(testFolder.getRoot(), "latin.txt.xmi").exists());
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) File(java.io.File) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine)

Example 22 with CollectionReader

use of org.apache.uima.collection.CollectionReader in project webanno by webanno.

the class WebAnnoTsv2ReaderWriterTest method test.

@Test
public void test() throws Exception {
    String targetFolder = "target/test-output/" + testContext.getTestOutputFolderName();
    CollectionReader reader = createCollectionReader(WebannoTsv2Reader.class, WebannoTsv2Reader.PARAM_PATH, "src/test/resources/tsv2/", WebannoTsv2Reader.PARAM_PATTERNS, "example2.tsv");
    List<String> multipleSpans = new ArrayList<>();
    multipleSpans.add("de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity");
    multipleSpans.add("de.tudarmstadt.ukp.dkpro.core.api.coref.type.Coreference");
    AnalysisEngineDescription writer = createPrimitiveDescription(WebannoTsv2Writer.class, WebannoTsv2Writer.PARAM_TARGET_LOCATION, targetFolder, WebannoTsv2Writer.PARAM_STRIP_EXTENSION, true, WebannoTsv2Writer.MULTIPLE_SPAN_ANNOTATIONS, multipleSpans);
    runPipeline(reader, writer);
    CollectionReader reader1 = createCollectionReader(WebannoTsv2Reader.class, WebannoTsv2Reader.PARAM_PATH, "src/test/resources/tsv2/", WebannoTsv2Reader.PARAM_PATTERNS, "example2.tsv");
    CAS cas1 = JCasFactory.createJCas().getCas();
    reader1.getNext(cas1);
    CollectionReader reader2 = createCollectionReader(WebannoTsv2Reader.class, WebannoTsv2Reader.PARAM_PATH, targetFolder, WebannoTsv2Reader.PARAM_PATTERNS, "example2.tsv");
    CAS cas2 = JCasFactory.createJCas().getCas();
    reader2.getNext(cas2);
    assertEquals(JCasUtil.select(cas2.getJCas(), Token.class).size(), JCasUtil.select(cas1.getJCas(), Token.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), POS.class).size(), JCasUtil.select(cas1.getJCas(), POS.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), Lemma.class).size(), JCasUtil.select(cas1.getJCas(), Lemma.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), NamedEntity.class).size(), JCasUtil.select(cas1.getJCas(), NamedEntity.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), Sentence.class).size(), JCasUtil.select(cas1.getJCas(), Sentence.class).size());
}
Also used : CollectionReaderFactory.createCollectionReader(org.apache.uima.fit.factory.CollectionReaderFactory.createCollectionReader) CollectionReader(org.apache.uima.collection.CollectionReader) CAS(org.apache.uima.cas.CAS) ArrayList(java.util.ArrayList) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) Test(org.junit.Test)

Example 23 with CollectionReader

use of org.apache.uima.collection.CollectionReader in project webanno by webanno.

the class WebAnnoTsv3ReaderWriterTest method test.

@Test
public void test() throws Exception {
    String targetFolder = "target/test-output/" + testContext.getTestOutputFolderName();
    CollectionReader reader = CollectionReaderFactory.createReader(WebannoTsv3Reader.class, WebannoTsv3Reader.PARAM_SOURCE_LOCATION, "src/test/resources/tsv3/", WebannoTsv3Reader.PARAM_PATTERNS, "coref.tsv");
    List<String> slotFeatures = new ArrayList<>();
    List<String> slotTargets = new ArrayList<>();
    List<String> linkTypes = new ArrayList<>();
    List<String> spanLayers = new ArrayList<>();
    spanLayers.add(NamedEntity.class.getName());
    spanLayers.add(POS.class.getName());
    spanLayers.add(Lemma.class.getName());
    List<String> chainLayers = new ArrayList<>();
    chainLayers.add("de.tudarmstadt.ukp.dkpro.core.api.coref.type.Coreference");
    List<String> relationLayers = new ArrayList<>();
    relationLayers.add(Dependency.class.getName());
    AnalysisEngineDescription writer = createEngineDescription(WebannoTsv3Writer.class, WebannoTsv3Writer.PARAM_TARGET_LOCATION, targetFolder, WebannoTsv3Writer.PARAM_STRIP_EXTENSION, true, WebannoTsv3Writer.PARAM_SPAN_LAYERS, spanLayers, WebannoTsv3Writer.PARAM_SLOT_FEATS, slotFeatures, WebannoTsv3Writer.PARAM_SLOT_TARGETS, slotTargets, WebannoTsv3Writer.PARAM_LINK_TYPES, linkTypes, WebannoTsv3Writer.PARAM_CHAIN_LAYERS, chainLayers, WebannoTsv3Writer.PARAM_RELATION_LAYERS, relationLayers);
    runPipeline(reader, writer);
    CollectionReader reader1 = CollectionReaderFactory.createReader(WebannoTsv3Reader.class, WebannoTsv3Reader.PARAM_SOURCE_LOCATION, "src/test/resources/tsv3/", WebannoTsv3Reader.PARAM_PATTERNS, "coref.tsv");
    CollectionReader reader2 = CollectionReaderFactory.createReader(WebannoTsv3Reader.class, WebannoTsv3Reader.PARAM_SOURCE_LOCATION, targetFolder, WebannoTsv3Reader.PARAM_PATTERNS, "coref.tsv");
    CAS cas1 = JCasFactory.createJCas().getCas();
    reader1.getNext(cas1);
    CAS cas2 = JCasFactory.createJCas().getCas();
    reader2.getNext(cas2);
    assertEquals(JCasUtil.select(cas2.getJCas(), Token.class).size(), JCasUtil.select(cas1.getJCas(), Token.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), POS.class).size(), JCasUtil.select(cas1.getJCas(), POS.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), Lemma.class).size(), JCasUtil.select(cas1.getJCas(), Lemma.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), NamedEntity.class).size(), JCasUtil.select(cas1.getJCas(), NamedEntity.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), Sentence.class).size(), JCasUtil.select(cas1.getJCas(), Sentence.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), Dependency.class).size(), JCasUtil.select(cas1.getJCas(), Dependency.class).size());
}
Also used : NamedEntity(de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity) CollectionReader(org.apache.uima.collection.CollectionReader) CAS(org.apache.uima.cas.CAS) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) Lemma(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma) ArrayList(java.util.ArrayList) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency) Test(org.junit.Test)

Example 24 with CollectionReader

use of org.apache.uima.collection.CollectionReader in project dkpro-lab by dkpro.

the class SimpleExecutionEngine method run.

@Override
public String run(Task aConfiguration) throws ExecutionException, LifeCycleException {
    if (!(aConfiguration instanceof UimaTask)) {
        throw new ExecutionException("This engine can only execute [" + UimaTask.class.getName() + "]");
    }
    UimaTask configuration = (UimaTask) aConfiguration;
    // Create persistence service for injection into analysis components
    TaskContext ctx = contextFactory.createContext(aConfiguration);
    try {
        ResourceManager resMgr = newDefaultResourceManager();
        // Make sure the descriptor is fully resolved. It will be modified and
        // thus should not be modified again afterwards by UIMA.
        AnalysisEngineDescription analysisDesc = configuration.getAnalysisEngineDescription(ctx);
        analysisDesc.resolveImports(resMgr);
        if (analysisDesc.getMetaData().getName() == null) {
            analysisDesc.getMetaData().setName("Analysis for " + aConfiguration.getType());
        }
        // Scan components that accept the service and bind it to them
        bindResource(analysisDesc, TaskContext.class, TaskContextProvider.class, TaskContextProvider.PARAM_FACTORY_NAME, contextFactory.getId(), TaskContextProvider.PARAM_CONTEXT_ID, ctx.getId());
        // Set up UIMA context & logging
        Logger logger = new UimaLoggingAdapter(ctx);
        UimaContextAdmin uimaCtx = newUimaContext(logger, resMgr, newConfigurationManager());
        // Set up reader
        CollectionReaderDescription readerDesc = configuration.getCollectionReaderDescription(ctx);
        if (readerDesc.getMetaData().getName() == null) {
            readerDesc.getMetaData().setName("Reader for " + aConfiguration.getType());
        }
        Map<String, Object> addReaderParam = new HashMap<String, Object>();
        addReaderParam.put(Resource.PARAM_UIMA_CONTEXT, uimaCtx);
        addReaderParam.put(Resource.PARAM_RESOURCE_MANAGER, resMgr);
        CollectionReader reader = produceCollectionReader(readerDesc, resMgr, addReaderParam);
        // Set up analysis engine
        AnalysisEngine engine;
        if (analysisDesc.isPrimitive()) {
            engine = new PrimitiveAnalysisEngine_impl();
        } else {
            engine = new AggregateAnalysisEngine_impl();
        }
        Map<String, Object> addEngineParam = new HashMap<String, Object>();
        addReaderParam.put(Resource.PARAM_UIMA_CONTEXT, uimaCtx);
        addReaderParam.put(Resource.PARAM_RESOURCE_MANAGER, resMgr);
        engine.initialize(analysisDesc, addEngineParam);
        // Now the setup is complete
        ctx.getLifeCycleManager().initialize(ctx, aConfiguration);
        // Start recording
        ctx.getLifeCycleManager().begin(ctx, aConfiguration);
        // Run the experiment
        // Apply the engine to all documents provided by the reader
        List<ResourceMetaData> metaData = new ArrayList<ResourceMetaData>();
        metaData.add(reader.getMetaData());
        metaData.add(engine.getMetaData());
        CAS cas = CasCreationUtils.createCas(metaData);
        while (reader.hasNext()) {
            reader.getNext(cas);
            engine.process(cas);
            String documentTitle = "";
            Feature documentTitleFeature = cas.getDocumentAnnotation().getType().getFeatureByBaseName("documentTitle");
            if (documentTitleFeature != null) {
                documentTitle = cas.getDocumentAnnotation().getFeatureValueAsString(documentTitleFeature);
            }
            cas.reset();
            Progress[] progresses = reader.getProgress();
            if (progresses != null) {
                for (Progress p : progresses) {
                    ctx.message("Progress " + readerDesc.getImplementationName() + " " + p.getCompleted() + "/" + p.getTotal() + " " + p.getUnit() + " " + "(" + documentTitle + ")");
                }
            }
        }
        // Shut down engine and reader
        engine.collectionProcessComplete();
        reader.close();
        engine.destroy();
        reader.destroy();
        // End recording
        ctx.getLifeCycleManager().complete(ctx, aConfiguration);
        return ctx.getId();
    } catch (LifeCycleException e) {
        ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
        throw e;
    } catch (Throwable e) {
        ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
        throw new ExecutionException(e);
    } finally {
        if (ctx != null) {
            ctx.getLifeCycleManager().destroy(ctx, aConfiguration);
        }
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LifeCycleException(org.dkpro.lab.engine.LifeCycleException) Logger(org.apache.uima.util.Logger) Feature(org.apache.uima.cas.Feature) PrimitiveAnalysisEngine_impl(org.apache.uima.analysis_engine.impl.PrimitiveAnalysisEngine_impl) UimaTask(org.dkpro.lab.uima.task.UimaTask) ExecutionException(org.dkpro.lab.engine.ExecutionException) UimaLoggingAdapter(org.dkpro.lab.uima.task.impl.UimaLoggingAdapter) Progress(org.apache.uima.util.Progress) TaskContext(org.dkpro.lab.engine.TaskContext) UIMAFramework.produceCollectionReader(org.apache.uima.UIMAFramework.produceCollectionReader) CollectionReader(org.apache.uima.collection.CollectionReader) ResourceManager(org.apache.uima.resource.ResourceManager) UIMAFramework.newDefaultResourceManager(org.apache.uima.UIMAFramework.newDefaultResourceManager) AggregateAnalysisEngine_impl(org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl) CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) CAS(org.apache.uima.cas.CAS) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) UimaContextAdmin(org.apache.uima.UimaContextAdmin) ResourceMetaData(org.apache.uima.resource.metadata.ResourceMetaData) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine)

Example 25 with CollectionReader

use of org.apache.uima.collection.CollectionReader in project dkpro-tc by dkpro.

the class LiblinearSaveAndLoadModelDocumentRegression method regressionLoadModel.

private void regressionLoadModel(File modelFolder) throws UIMAException, IOException {
    CollectionReader reader = CollectionReaderFactory.createReader(LinewiseTextOutcomeReader.class, LinewiseTextOutcomeReader.PARAM_OUTCOME_INDEX, 0, LinewiseTextOutcomeReader.PARAM_TEXT_INDEX, 1, LinewiseTextOutcomeReader.PARAM_SOURCE_LOCATION, regressionTest, LinewiseTextOutcomeReader.PARAM_LANGUAGE, "en");
    AnalysisEngine segmenter = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
    AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
    JCas jcas = JCasFactory.createJCas();
    reader.hasNext();
    reader.getNext(jcas.getCas());
    segmenter.process(jcas);
    tcAnno.process(jcas);
    List<TextClassificationOutcome> outcomes = new ArrayList<>(JCasUtil.select(jcas, TextClassificationOutcome.class));
    assertEquals(1, outcomes.size());
    Double d = Double.valueOf(outcomes.get(0).getOutcome());
    assertTrue(d > 0.1 && d < 5);
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine)

Aggregations

CollectionReader (org.apache.uima.collection.CollectionReader)35 JCas (org.apache.uima.jcas.JCas)28 ArrayList (java.util.ArrayList)25 TextClassificationOutcome (org.dkpro.tc.api.type.TextClassificationOutcome)15 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)14 Test (org.junit.Test)13 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)8 CAS (org.apache.uima.cas.CAS)7 File (java.io.File)5 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)5 List (java.util.List)4 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)4 TextClassificationSequence (org.dkpro.tc.api.type.TextClassificationSequence)4 POS (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS)3 Lemma (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma)3 TypeSystemDescription (org.apache.uima.resource.metadata.TypeSystemDescription)3 Evaluator (de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.Evaluator)2 PossibleValue (de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.PossibleValue)2 ValuesGenerator (de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.ValuesGenerator)2 ConstraintsGrammar (de.tudarmstadt.ukp.clarin.webanno.constraints.grammar.ConstraintsGrammar)2