use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.
the class InitTask method getAnalysisEngineDescription.
// what should actually be done in this task
@Override
public AnalysisEngineDescription getAnalysisEngineDescription(TaskContext aContext) throws ResourceInitializationException, IOException {
String output = isTesting ? OUTPUT_KEY_TEST : OUTPUT_KEY_TRAIN;
AnalysisEngineDescription xmiWriter = createEngineDescription(BinaryCasWriter.class, BinaryCasWriter.PARAM_TARGET_LOCATION, aContext.getFolder(output, AccessMode.READWRITE).getPath(), BinaryCasWriter.PARAM_FORMAT, "6+");
// special connector that just checks whether there are no instances and outputs a
// meaningful error message then
// should be added before preprocessing
AnalysisEngineDescription emptyProblemChecker = createEngineDescription(PreprocessConnector.class);
// PART_TWO views
if (featureMode.equals(FM_PAIR)) {
AggregateBuilder builder = new AggregateBuilder();
builder.add(createEngineDescription(preprocessing), CAS.NAME_DEFAULT_SOFA, PART_ONE);
builder.add(createEngineDescription(preprocessing), CAS.NAME_DEFAULT_SOFA, PART_TWO);
preprocessing = builder.createAggregateDescription();
} else if (operativeViews != null) {
AggregateBuilder builder = new AggregateBuilder();
for (String viewName : operativeViews) {
builder.add(createEngineDescription(preprocessing), CAS.NAME_DEFAULT_SOFA, viewName);
}
preprocessing = builder.createAggregateDescription();
}
return createEngineDescription(createEngineDescription(DocumentModeAnnotator.class, DocumentModeAnnotator.PARAM_FEATURE_MODE, featureMode), // assign each CAS an unique id
createEngineDescription(AssignIdConnector.class), // tc pre validity check
getPreValidityCheckEngine(), emptyProblemChecker, // user preprocessing
preprocessing, // tc post validity check
getPostValidityCheckEngine(), // collects the outcomes
createEngineDescription(OutcomeCollector.class, OutcomeCollector.PARAM_TARGET_FOLDER, aContext.getFolder(output, AccessMode.READWRITE)), // write CAS to HDD
xmiWriter);
}
use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project dkpro-tc by dkpro.
the class TcAnnotator method initialize.
@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
super.initialize(context);
try {
featureExtractors = new FeatureResourceLoader(tcModelLocation).loadExternalResourceDescriptionOfFeatures();
mlAdapter = initMachineLearningAdapter(tcModelLocation);
featureMode = initFeatureMode(tcModelLocation);
learningMode = initLearningMode(tcModelLocation);
validateUimaParameter();
AnalysisEngineDescription connector = getSaveModelConnector(tcModelLocation.getAbsolutePath(), mlAdapter, learningMode, featureMode, featureExtractors);
engine = UIMAFramework.produceAnalysisEngine(connector, getModelFeatureAwareResourceManager(tcModelLocation), null);
} catch (Exception e) {
throw new ResourceInitializationException(e);
}
}
use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project webanno by webanno.
the class ImportExportServiceImpl method exportCasToFile.
/**
* A new directory is created using UUID so that every exported file will reside in its own
* directory. This is useful as the written file can have multiple extensions based on the
* Writer class used.
*/
@Override
public File exportCasToFile(CAS cas, SourceDocument aDocument, String aFileName, @SuppressWarnings("rawtypes") Class aWriter, boolean aStripExtension) throws IOException, UIMAException {
// Update the source file name in case it is changed for some reason. This is necessary
// for the writers to create the files under the correct names.
Project project = aDocument.getProject();
File currentDocumentUri = new File(dir.getAbsolutePath() + "/" + PROJECT_FOLDER + "/" + project.getId() + "/" + DOCUMENT_FOLDER + "/" + aDocument.getId() + "/" + SOURCE_FOLDER);
DocumentMetaData documentMetadata = DocumentMetaData.get(cas.getJCas());
documentMetadata.setDocumentUri(new File(currentDocumentUri, aFileName).toURI().toURL().toExternalForm());
documentMetadata.setDocumentBaseUri(currentDocumentUri.toURI().toURL().toExternalForm());
documentMetadata.setCollectionId(currentDocumentUri.toURI().toURL().toExternalForm());
documentMetadata.setDocumentUri(new File(dir.getAbsolutePath() + "/" + PROJECT_FOLDER + "/" + project.getId() + "/" + DOCUMENT_FOLDER + "/" + aDocument.getId() + "/" + SOURCE_FOLDER + "/" + aFileName).toURI().toURL().toExternalForm());
// update with the correct tagset name
List<AnnotationFeature> features = annotationService.listAnnotationFeature(project);
for (AnnotationFeature feature : features) {
TagSet tagSet = feature.getTagset();
if (tagSet == null) {
continue;
} else if (!feature.getLayer().getType().equals(WebAnnoConst.CHAIN_TYPE)) {
updateCasWithTagSet(cas, feature.getLayer().getName(), tagSet.getName());
}
}
File exportTempDir = File.createTempFile("webanno", "export");
try {
exportTempDir.delete();
exportTempDir.mkdirs();
AnalysisEngineDescription writer;
if (aWriter.getName().equals("de.tudarmstadt.ukp.clarin.webanno.tsv.WebannoTsv3Writer")) {
List<AnnotationLayer> layers = annotationService.listAnnotationLayer(aDocument.getProject());
List<String> slotFeatures = new ArrayList<>();
List<String> slotTargets = new ArrayList<>();
List<String> linkTypes = new ArrayList<>();
Set<String> spanLayers = new HashSet<>();
Set<String> slotLayers = new HashSet<>();
for (AnnotationLayer layer : layers) {
if (layer.getType().contentEquals(WebAnnoConst.SPAN_TYPE)) {
// TSV will not use this
if (!annotationExists(cas, layer.getName())) {
continue;
}
boolean isslotLayer = false;
for (AnnotationFeature f : annotationService.listAnnotationFeature(layer)) {
if (MultiValueMode.ARRAY.equals(f.getMultiValueMode()) && LinkMode.WITH_ROLE.equals(f.getLinkMode())) {
isslotLayer = true;
slotFeatures.add(layer.getName() + ":" + f.getName());
slotTargets.add(f.getType());
linkTypes.add(f.getLinkTypeName());
}
}
if (isslotLayer) {
slotLayers.add(layer.getName());
} else {
spanLayers.add(layer.getName());
}
}
}
spanLayers.addAll(slotLayers);
List<String> chainLayers = new ArrayList<>();
for (AnnotationLayer layer : layers) {
if (layer.getType().contentEquals(WebAnnoConst.CHAIN_TYPE)) {
if (!chainAnnotationExists(cas, layer.getName() + "Chain")) {
continue;
}
chainLayers.add(layer.getName());
}
}
List<String> relationLayers = new ArrayList<>();
for (AnnotationLayer layer : layers) {
if (layer.getType().contentEquals(WebAnnoConst.RELATION_TYPE)) {
// TSV will not use this
if (!annotationExists(cas, layer.getName())) {
continue;
}
relationLayers.add(layer.getName());
}
}
writer = createEngineDescription(aWriter, JCasFileWriter_ImplBase.PARAM_TARGET_LOCATION, exportTempDir, JCasFileWriter_ImplBase.PARAM_STRIP_EXTENSION, aStripExtension, "spanLayers", spanLayers, "slotFeatures", slotFeatures, "slotTargets", slotTargets, "linkTypes", linkTypes, "chainLayers", chainLayers, "relationLayers", relationLayers);
} else {
writer = createEngineDescription(aWriter, JCasFileWriter_ImplBase.PARAM_TARGET_LOCATION, exportTempDir, JCasFileWriter_ImplBase.PARAM_STRIP_EXTENSION, aStripExtension);
}
runPipeline(cas, writer);
// If the writer produced more than one file, we package it up as a ZIP file
File exportFile;
if (exportTempDir.listFiles().length > 1) {
exportFile = new File(exportTempDir.getAbsolutePath() + ".zip");
try {
ZipUtils.zipFolder(exportTempDir, exportFile);
} catch (Exception e) {
try (MDC.MDCCloseable closable = MDC.putCloseable(Logging.KEY_PROJECT_ID, String.valueOf(project.getId()))) {
log.info("Unable to create zip File");
}
}
} else {
exportFile = new File(exportTempDir.getParent(), exportTempDir.listFiles()[0].getName());
FileUtils.copyFile(exportTempDir.listFiles()[0], exportFile);
}
return exportFile;
} finally {
if (exportTempDir != null) {
FileUtils.forceDelete(exportTempDir);
}
}
}
use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project webanno by webanno.
the class WebAnnoTsv3WriterTestBase method writeAndAssertEquals.
private void writeAndAssertEquals(JCas aJCas, Object... aParams) throws IOException, ResourceInitializationException, AnalysisEngineProcessException {
assumeFalse("This test is known to fail.", isKnownToFail(testContext.getMethodName()));
String targetFolder = "target/test-output/" + testContext.getClassName() + "/" + getSuiteName() + "/" + testContext.getMethodName();
String referenceFolder = "src/test/resources/" + getSuiteName() + "/" + testContext.getMethodName();
List<Object> params = new ArrayList<>();
params.addAll(asList(aParams));
params.add(WebannoTsv3Writer.PARAM_TARGET_LOCATION);
params.add(targetFolder);
AnalysisEngineDescription tsv = makeWriter();
for (int i = 0; i < params.size(); i += 2) {
String name = (String) params.get(i);
Object value = params.get(i + 1);
if (ConfigurationParameterFactory.canParameterBeSet(tsv, name)) {
ConfigurationParameterFactory.setParameter(tsv, name, value);
}
}
AnalysisEngineDescription xmi = createEngineDescription(XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION, targetFolder);
SimplePipeline.runPipeline(aJCas, tsv, xmi);
File referenceFile = new File(referenceFolder, "reference.tsv");
assumeTrue("No reference data available for this test.", referenceFile.exists());
File actualFile = new File(targetFolder, "doc.tsv");
String reference = FileUtils.readFileToString(referenceFile, "UTF-8");
String actual = FileUtils.readFileToString(actualFile, "UTF-8");
assertEquals(reference, actual);
}
use of org.apache.uima.analysis_engine.AnalysisEngineDescription in project webanno by webanno.
the class WebAnnoTsv2ReaderWriterTest method test.
@Test
public void test() throws Exception {
String targetFolder = "target/test-output/" + testContext.getTestOutputFolderName();
CollectionReader reader = createCollectionReader(WebannoTsv2Reader.class, WebannoTsv2Reader.PARAM_PATH, "src/test/resources/tsv2/", WebannoTsv2Reader.PARAM_PATTERNS, "example2.tsv");
List<String> multipleSpans = new ArrayList<>();
multipleSpans.add("de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity");
multipleSpans.add("de.tudarmstadt.ukp.dkpro.core.api.coref.type.Coreference");
AnalysisEngineDescription writer = createPrimitiveDescription(WebannoTsv2Writer.class, WebannoTsv2Writer.PARAM_TARGET_LOCATION, targetFolder, WebannoTsv2Writer.PARAM_STRIP_EXTENSION, true, WebannoTsv2Writer.MULTIPLE_SPAN_ANNOTATIONS, multipleSpans);
runPipeline(reader, writer);
CollectionReader reader1 = createCollectionReader(WebannoTsv2Reader.class, WebannoTsv2Reader.PARAM_PATH, "src/test/resources/tsv2/", WebannoTsv2Reader.PARAM_PATTERNS, "example2.tsv");
CAS cas1 = JCasFactory.createJCas().getCas();
reader1.getNext(cas1);
CollectionReader reader2 = createCollectionReader(WebannoTsv2Reader.class, WebannoTsv2Reader.PARAM_PATH, targetFolder, WebannoTsv2Reader.PARAM_PATTERNS, "example2.tsv");
CAS cas2 = JCasFactory.createJCas().getCas();
reader2.getNext(cas2);
assertEquals(JCasUtil.select(cas2.getJCas(), Token.class).size(), JCasUtil.select(cas1.getJCas(), Token.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), POS.class).size(), JCasUtil.select(cas1.getJCas(), POS.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), Lemma.class).size(), JCasUtil.select(cas1.getJCas(), Lemma.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), NamedEntity.class).size(), JCasUtil.select(cas1.getJCas(), NamedEntity.class).size());
assertEquals(JCasUtil.select(cas2.getJCas(), Sentence.class).size(), JCasUtil.select(cas1.getJCas(), Sentence.class).size());
}
Aggregations