Search in sources :

Example 31 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.

the class InitTask method getAnalysisEngineDescription.

// what should actually be done in this task
@Override
public AnalysisEngineDescription getAnalysisEngineDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
    String output = isTesting ? OUTPUT_KEY_TEST : OUTPUT_KEY_TRAIN;
    AnalysisEngineDescription xmiWriter = createEngineDescription(BinaryCasWriter.class, BinaryCasWriter.PARAM_TARGET_LOCATION, aContext.getFolder(output, AccessMode.READWRITE).getPath(), BinaryCasWriter.PARAM_FORMAT, "6+");
    // special connector that just checks whether there are no instances and outputs a
    // meaningful error message then
    // should be added before preprocessing
    AnalysisEngineDescription emptyProblemChecker = createEngineDescription(PreprocessConnector.class);
    // PART_TWO views
    if (featureMode.equals(FM_PAIR)) {
        AggregateBuilder builder = new AggregateBuilder();
        builder.add(createEngineDescription(preprocessing), CAS.NAME_DEFAULT_SOFA, PART_ONE);
        builder.add(createEngineDescription(preprocessing), CAS.NAME_DEFAULT_SOFA, PART_TWO);
        preprocessing = builder.createAggregateDescription();
    } else if (operativeViews != null) {
        AggregateBuilder builder = new AggregateBuilder();
        for (String viewName : operativeViews) {
            builder.add(createEngineDescription(preprocessing), CAS.NAME_DEFAULT_SOFA, viewName);
        }
        preprocessing = builder.createAggregateDescription();
    }
    return createEngineDescription(createEngineDescription(DocumentModeAnnotator.class, DocumentModeAnnotator.PARAM_FEATURE_MODE, featureMode), // assign each CAS an unique id
    createEngineDescription(AssignIdConnector.class), // tc pre validity check
    getPreValidityCheckEngine(), emptyProblemChecker, // user preprocessing
    preprocessing, // tc post validity check
    getPostValidityCheckEngine(), // collects the outcomes
    createEngineDescription(OutcomeCollector.class, OutcomeCollector.PARAM_TARGET_FOLDER, aContext.getFolder(output, AccessMode.READWRITE)), // write CAS to HDD
    xmiWriter);
}
Also used : DocumentModeAnnotator(org.dkpro.tc.core.task.uima.DocumentModeAnnotator) AssignIdConnector(org.dkpro.tc.core.task.uima.AssignIdConnector) AggregateBuilder(org.apache.uima.fit.factory.AggregateBuilder) OutcomeCollector(org.dkpro.tc.core.task.uima.OutcomeCollector) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription)

Example 32 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.

the class TcAnnotator method initialize.

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
    super.initialize(context);
    try {
        featureExtractors = new FeatureResourceLoader(tcModelLocation).loadExternalResourceDescriptionOfFeatures();
        mlAdapter = initMachineLearningAdapter(tcModelLocation);
        featureMode = initFeatureMode(tcModelLocation);
        learningMode = initLearningMode(tcModelLocation);
        validateUimaParameter();
        AnalysisEngineDescription connector = getSaveModelConnector(tcModelLocation.getAbsolutePath(), mlAdapter, learningMode, featureMode, featureExtractors);
        engine = UIMAFramework.produceAnalysisEngine(connector, getModelFeatureAwareResourceManager(tcModelLocation), null);
    } catch (Exception e) {
        throw new ResourceInitializationException(e);
    }
}
Also used : ResourceInitializationException(org.apache.uima.resource.ResourceInitializationException) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) ResourceInitializationException(org.apache.uima.resource.ResourceInitializationException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException)

Example 33 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project webanno by webanno.

the class ImportExportServiceImpl method exportCasToFile.

/**
 * A new directory is created using UUID so that every exported file will reside in its own
 * directory. This is useful as the written file can have multiple extensions based on the
 * Writer class used.
 */
@Override
public File exportCasToFile(CAS cas, SourceDocument aDocument, String aFileName, @SuppressWarnings("rawtypes") Class aWriter, boolean aStripExtension) throws IOException, UIMAException {
    // Update the source file name in case it is changed for some reason. This is necessary
    // for the writers to create the files under the correct names.
    Project project = aDocument.getProject();
    File currentDocumentUri = new File(dir.getAbsolutePath() + "/" + PROJECT_FOLDER + "/" + project.getId() + "/" + DOCUMENT_FOLDER + "/" + aDocument.getId() + "/" + SOURCE_FOLDER);
    DocumentMetaData documentMetadata = DocumentMetaData.get(cas.getJCas());
    documentMetadata.setDocumentUri(new File(currentDocumentUri, aFileName).toURI().toURL().toExternalForm());
    documentMetadata.setDocumentBaseUri(currentDocumentUri.toURI().toURL().toExternalForm());
    documentMetadata.setCollectionId(currentDocumentUri.toURI().toURL().toExternalForm());
    documentMetadata.setDocumentUri(new File(dir.getAbsolutePath() + "/" + PROJECT_FOLDER + "/" + project.getId() + "/" + DOCUMENT_FOLDER + "/" + aDocument.getId() + "/" + SOURCE_FOLDER + "/" + aFileName).toURI().toURL().toExternalForm());
    // update with the correct tagset name
    List<AnnotationFeature> features = annotationService.listAnnotationFeature(project);
    for (AnnotationFeature feature : features) {
        TagSet tagSet = feature.getTagset();
        if (tagSet == null) {
            continue;
        } else if (!feature.getLayer().getType().equals(WebAnnoConst.CHAIN_TYPE)) {
            updateCasWithTagSet(cas, feature.getLayer().getName(), tagSet.getName());
        }
    }
    File exportTempDir = File.createTempFile("webanno", "export");
    try {
        exportTempDir.delete();
        exportTempDir.mkdirs();
        AnalysisEngineDescription writer;
        if (aWriter.getName().equals("de.tudarmstadt.ukp.clarin.webanno.tsv.WebannoTsv3Writer")) {
            List<AnnotationLayer> layers = annotationService.listAnnotationLayer(aDocument.getProject());
            List<String> slotFeatures = new ArrayList<>();
            List<String> slotTargets = new ArrayList<>();
            List<String> linkTypes = new ArrayList<>();
            Set<String> spanLayers = new HashSet<>();
            Set<String> slotLayers = new HashSet<>();
            for (AnnotationLayer layer : layers) {
                if (layer.getType().contentEquals(WebAnnoConst.SPAN_TYPE)) {
                    // TSV will not use this
                    if (!annotationExists(cas, layer.getName())) {
                        continue;
                    }
                    boolean isslotLayer = false;
                    for (AnnotationFeature f : annotationService.listAnnotationFeature(layer)) {
                        if (MultiValueMode.ARRAY.equals(f.getMultiValueMode()) && LinkMode.WITH_ROLE.equals(f.getLinkMode())) {
                            isslotLayer = true;
                            slotFeatures.add(layer.getName() + ":" + f.getName());
                            slotTargets.add(f.getType());
                            linkTypes.add(f.getLinkTypeName());
                        }
                    }
                    if (isslotLayer) {
                        slotLayers.add(layer.getName());
                    } else {
                        spanLayers.add(layer.getName());
                    }
                }
            }
            spanLayers.addAll(slotLayers);
            List<String> chainLayers = new ArrayList<>();
            for (AnnotationLayer layer : layers) {
                if (layer.getType().contentEquals(WebAnnoConst.CHAIN_TYPE)) {
                    if (!chainAnnotationExists(cas, layer.getName() + "Chain")) {
                        continue;
                    }
                    chainLayers.add(layer.getName());
                }
            }
            List<String> relationLayers = new ArrayList<>();
            for (AnnotationLayer layer : layers) {
                if (layer.getType().contentEquals(WebAnnoConst.RELATION_TYPE)) {
                    // TSV will not use this
                    if (!annotationExists(cas, layer.getName())) {
                        continue;
                    }
                    relationLayers.add(layer.getName());
                }
            }
            writer = createEngineDescription(aWriter, JCasFileWriter_ImplBase.PARAM_TARGET_LOCATION, exportTempDir, JCasFileWriter_ImplBase.PARAM_STRIP_EXTENSION, aStripExtension, "spanLayers", spanLayers, "slotFeatures", slotFeatures, "slotTargets", slotTargets, "linkTypes", linkTypes, "chainLayers", chainLayers, "relationLayers", relationLayers);
        } else {
            writer = createEngineDescription(aWriter, JCasFileWriter_ImplBase.PARAM_TARGET_LOCATION, exportTempDir, JCasFileWriter_ImplBase.PARAM_STRIP_EXTENSION, aStripExtension);
        }
        runPipeline(cas, writer);
        // If the writer produced more than one file, we package it up as a ZIP file
        File exportFile;
        if (exportTempDir.listFiles().length > 1) {
            exportFile = new File(exportTempDir.getAbsolutePath() + ".zip");
            try {
                ZipUtils.zipFolder(exportTempDir, exportFile);
            } catch (Exception e) {
                try (MDC.MDCCloseable closable = MDC.putCloseable(Logging.KEY_PROJECT_ID, String.valueOf(project.getId()))) {
                    log.info("Unable to create zip File");
                }
            }
        } else {
            exportFile = new File(exportTempDir.getParent(), exportTempDir.listFiles()[0].getName());
            FileUtils.copyFile(exportTempDir.listFiles()[0], exportFile);
        }
        return exportFile;
    } finally {
        if (exportTempDir != null) {
            FileUtils.forceDelete(exportTempDir);
        }
    }
}
Also used : ArrayList(java.util.ArrayList) AnnotationLayer(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer) UIMAException(org.apache.uima.UIMAException) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) Project(de.tudarmstadt.ukp.clarin.webanno.model.Project) TagSet(de.tudarmstadt.ukp.clarin.webanno.model.TagSet) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) DocumentMetaData(de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData) File(java.io.File) AnnotationFeature(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature) HashSet(java.util.HashSet)

Example 34 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project webanno by webanno.

the class WebAnnoTsv3WriterTestBase method writeAndAssertEquals.

private void writeAndAssertEquals(JCas aJCas, Object... aParams) throws IOException, ResourceInitializationException, AnalysisEngineProcessException {
    assumeFalse("This test is known to fail.", isKnownToFail(testContext.getMethodName()));
    String targetFolder = "target/test-output/" + testContext.getClassName() + "/" + getSuiteName() + "/" + testContext.getMethodName();
    String referenceFolder = "src/test/resources/" + getSuiteName() + "/" + testContext.getMethodName();
    List<Object> params = new ArrayList<>();
    params.addAll(asList(aParams));
    params.add(WebannoTsv3Writer.PARAM_TARGET_LOCATION);
    params.add(targetFolder);
    AnalysisEngineDescription tsv = makeWriter();
    for (int i = 0; i < params.size(); i += 2) {
        String name = (String) params.get(i);
        Object value = params.get(i + 1);
        if (ConfigurationParameterFactory.canParameterBeSet(tsv, name)) {
            ConfigurationParameterFactory.setParameter(tsv, name, value);
        }
    }
    AnalysisEngineDescription xmi = createEngineDescription(XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION, targetFolder);
    SimplePipeline.runPipeline(aJCas, tsv, xmi);
    File referenceFile = new File(referenceFolder, "reference.tsv");
    assumeTrue("No reference data available for this test.", referenceFile.exists());
    File actualFile = new File(targetFolder, "doc.tsv");
    String reference = FileUtils.readFileToString(referenceFile, "UTF-8");
    String actual = FileUtils.readFileToString(actualFile, "UTF-8");
    assertEquals(reference, actual);
}
Also used : ArrayList(java.util.ArrayList) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) File(java.io.File)

Example 35 with AnalysisEngineDescription

use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project webanno by webanno.

the class WebAnnoTsv2ReaderWriterTest method test.

@Test
public void test() throws Exception {
    String targetFolder = "target/test-output/" + testContext.getTestOutputFolderName();
    CollectionReader reader = createCollectionReader(WebannoTsv2Reader.class, WebannoTsv2Reader.PARAM_PATH, "src/test/resources/tsv2/", WebannoTsv2Reader.PARAM_PATTERNS, "example2.tsv");
    List<String> multipleSpans = new ArrayList<>();
    multipleSpans.add("de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity");
    multipleSpans.add("de.tudarmstadt.ukp.dkpro.core.api.coref.type.Coreference");
    AnalysisEngineDescription writer = createPrimitiveDescription(WebannoTsv2Writer.class, WebannoTsv2Writer.PARAM_TARGET_LOCATION, targetFolder, WebannoTsv2Writer.PARAM_STRIP_EXTENSION, true, WebannoTsv2Writer.MULTIPLE_SPAN_ANNOTATIONS, multipleSpans);
    runPipeline(reader, writer);
    CollectionReader reader1 = createCollectionReader(WebannoTsv2Reader.class, WebannoTsv2Reader.PARAM_PATH, "src/test/resources/tsv2/", WebannoTsv2Reader.PARAM_PATTERNS, "example2.tsv");
    CAS cas1 = JCasFactory.createJCas().getCas();
    reader1.getNext(cas1);
    CollectionReader reader2 = createCollectionReader(WebannoTsv2Reader.class, WebannoTsv2Reader.PARAM_PATH, targetFolder, WebannoTsv2Reader.PARAM_PATTERNS, "example2.tsv");
    CAS cas2 = JCasFactory.createJCas().getCas();
    reader2.getNext(cas2);
    assertEquals(JCasUtil.select(cas2.getJCas(), Token.class).size(), JCasUtil.select(cas1.getJCas(), Token.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), POS.class).size(), JCasUtil.select(cas1.getJCas(), POS.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), Lemma.class).size(), JCasUtil.select(cas1.getJCas(), Lemma.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), NamedEntity.class).size(), JCasUtil.select(cas1.getJCas(), NamedEntity.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), Sentence.class).size(), JCasUtil.select(cas1.getJCas(), Sentence.class).size());
}
Also used : CollectionReaderFactory.createCollectionReader(org.apache.uima.fit.factory.CollectionReaderFactory.createCollectionReader) CollectionReader(org.apache.uima.collection.CollectionReader) CAS(org.apache.uima.cas.CAS) ArrayList(java.util.ArrayList) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) Test(org.junit.Test)

Aggregations

AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)62 Test (org.junit.Test)32 File (java.io.File)27 CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)25 ArrayList (java.util.ArrayList)22 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)18 JCas (org.apache.uima.jcas.JCas)16 Feature (org.dkpro.tc.api.features.Feature)13 FeatureTestUtil.assertFeature (org.dkpro.tc.testing.FeatureTestUtil.assertFeature)11 ExternalResourceDescription (org.apache.uima.resource.ExternalResourceDescription)10 AggregateBuilder (org.apache.uima.fit.factory.AggregateBuilder)8 ResourceInitializationException (org.apache.uima.resource.ResourceInitializationException)8 JsonDataWriter (org.dkpro.tc.core.io.JsonDataWriter)8 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)7 Gson (com.google.gson.Gson)6 IOException (java.io.IOException)6 Instance (org.dkpro.tc.api.features.Instance)6 OpenNlpPosTagger (de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger)4 BreakIteratorSegmenter (de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter)4 CAS (org.apache.uima.cas.CAS)4