use of de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.CasStorageSession in project webanno by webanno.
the class AnnotationDocumentExporter method exportAnnotationDocumentContents.
private void exportAnnotationDocumentContents(ProjectExportRequest aRequest, ProjectExportTaskMonitor aMonitor, ExportedProject aExProject, File aStage) throws UIMAException, ClassNotFoundException, IOException {
Project project = aRequest.getProject();
// The export process may store project-related information in this context to ensure it
// is looked up only once during the bulk operation and the DB is not hit too often.
Map<Pair<Project, String>, Object> bulkOperationContext = new HashMap<>();
List<SourceDocument> documents = documentService.listSourceDocuments(project);
int i = 1;
int initProgress = aMonitor.getProgress();
// Create a map containing the annotation documents for each source document. Doing this
// as one DB access before the main processing to avoid hammering the DB in the loops
// below.
Map<SourceDocument, List<AnnotationDocument>> srcToAnnIdx = documentService.listAnnotationDocuments(project).stream().collect(groupingBy(doc -> doc.getDocument(), toList()));
// Cache user lookups to avoid constantly hitting the database
LoadingCache<String, User> usersCache = Caffeine.newBuilder().build(key -> userRepository.get(key));
for (SourceDocument srcDoc : documents) {
try (CasStorageSession session = CasStorageSession.openNested()) {
// If the initial CAS does not exist yet, it must be created before export.
if (!documentService.existsInitialCas(srcDoc)) {
documentService.createOrReadInitialCas(srcDoc);
}
File targetDir = new File(aStage, ANNOTATION_CAS_FOLDER + srcDoc.getName());
forceMkdir(targetDir);
File initialCasFile = documentService.getCasFile(srcDoc, INITIAL_CAS_PSEUDO_USER);
copyFileToDirectory(initialCasFile, targetDir);
log.info("Exported annotation document content for user [" + INITIAL_CAS_PSEUDO_USER + "] for source document [" + srcDoc.getId() + "] in project [" + project.getName() + "] with id [" + project.getId() + "]");
//
// Export per-user annotation document
//
// Determine which format to use for export
String formatId = FORMAT_AUTO.equals(aRequest.getFormat()) ? srcDoc.getFormat() : aRequest.getFormat();
FormatSupport format = importExportService.getWritableFormatById(formatId).orElseGet(() -> {
FormatSupport fallbackFormat = new WebAnnoTsv3FormatSupport();
aMonitor.addMessage(LogMessage.warn(this, "Annotation: [%s] No writer " + "found for original format [%s] - exporting as [%s] " + "instead.", srcDoc.getName(), formatId, fallbackFormat.getName()));
return fallbackFormat;
});
// Export annotations from regular users
for (AnnotationDocument annDoc : srcToAnnIdx.computeIfAbsent(srcDoc, key -> emptyList())) {
// annotation document is not NEW/IGNORE
if (usersCache.get(annDoc.getUser()) != null && !annDoc.getState().equals(AnnotationDocumentState.NEW) && !annDoc.getState().equals(AnnotationDocumentState.IGNORE)) {
File annSerDir = new File(aStage.getAbsolutePath() + ANNOTATION_CAS_FOLDER + srcDoc.getName());
File annDocDir = new File(aStage.getAbsolutePath() + ANNOTATION_ORIGINAL_FOLDER + srcDoc.getName());
forceMkdir(annSerDir);
forceMkdir(annDocDir);
File annSerFile = documentService.getCasFile(srcDoc, annDoc.getUser());
File annFile = null;
if (annSerFile.exists()) {
annFile = importExportService.exportAnnotationDocument(srcDoc, annDoc.getUser(), format, annDoc.getUser(), ANNOTATION, false, bulkOperationContext);
}
if (annSerFile.exists()) {
copyFileToDirectory(annSerFile, annSerDir);
copyFileToDirectory(annFile, annDocDir);
forceDelete(annFile);
}
log.info("Exported annotation document content for user [" + annDoc.getUser() + "] for source document [" + srcDoc.getId() + "] in project [" + project.getName() + "] with id [" + project.getId() + "]");
}
}
// correction type projects.
if (PROJECT_TYPE_AUTOMATION.equals(project.getMode()) || PROJECT_TYPE_CORRECTION.equals(project.getMode())) {
File corrSerFile = documentService.getCasFile(srcDoc, CORRECTION_USER);
if (corrSerFile.exists()) {
// Copy CAS - this is used when importing the project again
// Util WebAnno 3.4.x, the CORRECTION_USER CAS was exported to 'curation'
// and 'curation_ser'.
// Since WebAnno 3.5.x, the CORRECTION_USER CAS is exported to 'annotation'
// and 'annotation_ser'.
File curationSerDir = new File(aStage + ANNOTATION_AS_SERIALISED_CAS + srcDoc.getName());
forceMkdir(curationSerDir);
copyFileToDirectory(corrSerFile, curationSerDir);
// Copy secondary export format for convenience - not used during import
File curationDir = new File(aStage + ANNOTATION_ORIGINAL_FOLDER + srcDoc.getName());
forceMkdir(curationDir);
File corrFile = importExportService.exportAnnotationDocument(srcDoc, CORRECTION_USER, format, CORRECTION_USER, CORRECTION);
copyFileToDirectory(corrFile, curationDir);
forceDelete(corrFile);
}
}
}
aMonitor.setProgress(initProgress + (int) ceil(((double) i) / documents.size() * 80.0));
i++;
}
}
use of de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.CasStorageSession in project webanno by webanno.
the class CasStorageServiceImplTest method testThatLayerChangeEventInvalidatesCachedCas.
@Test
public void testThatLayerChangeEventInvalidatesCachedCas() throws Exception {
// Setup fixture
SourceDocument doc = makeSourceDocument(4l, 4l, "test");
String user = "test";
try (CasStorageSession session = openNested(true)) {
String text = "This is a test";
createCasFile(doc, user, text);
}
// Actual test
int casIdentity1;
try (CasStorageSession session = openNested(true)) {
JCas cas = sut.readCas(doc, user).getJCas();
casIdentity1 = System.identityHashCode(cas);
}
int casIdentity2;
try (CasStorageSession session = openNested(true)) {
JCas cas = sut.readCas(doc, user).getJCas();
casIdentity2 = System.identityHashCode(cas);
}
sut.beforeLayerConfigurationChanged(new LayerConfigurationChangedEvent(this, doc.getProject()));
int casIdentity3;
try (CasStorageSession session = openNested(true)) {
JCas cas = sut.readCas(doc, user).getJCas();
casIdentity3 = System.identityHashCode(cas);
}
assertThat(casIdentity1).as("Getting the CAS a second time returns the same instance from memory").isEqualTo(casIdentity2);
assertThat(casIdentity1).as("After a type system change event must return a different CAS instance").isNotEqualTo(casIdentity3);
}
use of de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.CasStorageSession in project webanno by webanno.
the class CasStorageServiceImplTest method testConcurrentAccess.
@Test
public void testConcurrentAccess() throws Exception {
// Setup fixture
SourceDocument doc = makeSourceDocument(5l, 5l, "test");
String user = "test";
File casFile = sut.getCasFile(doc, user);
try (CasStorageSession session = openNested(true)) {
createCasFile(doc, user, "This is a test");
assertThat(casFile).exists();
}
try (CasStorageSession casStorageSession = openNested(true)) {
CAS mainCas = sut.readCas(doc, user, EXCLUSIVE_WRITE_ACCESS);
casFile.setLastModified(casFile.lastModified() + 10_000);
long timestamp = casFile.lastModified();
assertThatExceptionOfType(IOException.class).isThrownBy(() -> sut.writeCas(doc, mainCas, user)).withMessageContaining("concurrent modification");
assertThat(casFile).exists();
assertThat(casFile.lastModified()).isEqualTo(timestamp);
}
}
use of de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.CasStorageSession in project webanno by webanno.
the class CasStorageServiceImpl method forceActionOnCas.
@Override
public void forceActionOnCas(SourceDocument aDocument, String aUser, CasStorageServiceLoader aLoader, CasStorageServiceAction aAction, boolean aSave) throws IOException {
// upgrade it, then add this info to a mini-session to ensure that write-access is known
try (CasStorageSession session = CasStorageSession.openNested(true)) {
try (WithExclusiveAccess access = new WithExclusiveAccess(aDocument, aUser)) {
session.add(aDocument.getId(), aUser, EXCLUSIVE_WRITE_ACCESS, access.getHolder());
CAS cas = aLoader.load(aDocument, aUser);
access.setCas(cas);
aAction.apply(cas);
if (aSave) {
realWriteCas(aDocument, aUser, cas);
}
} finally {
session.remove(aDocument.getId(), aUser);
}
} catch (IOException e) {
throw e;
} catch (Exception e) {
throw new IOException(e);
}
}
use of de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.CasStorageSession in project webanno by webanno.
the class ImportExportServiceImpl method exportCasToFile.
@Override
public File exportCasToFile(CAS aCas, SourceDocument aDocument, String aFileName, FormatSupport aFormat, boolean aStripExtension, Map<Pair<Project, String>, Object> aBulkOperationContext) throws IOException, UIMAException {
Project project = aDocument.getProject();
Map<Pair<Project, String>, Object> bulkOperationContext = aBulkOperationContext;
if (bulkOperationContext == null) {
bulkOperationContext = new HashMap<>();
}
// Either fetch the type system from the bulk-context or fetch it from the DB and store it
// in the bulk-context to avoid further lookups in the same bulk operation
Pair<Project, String> exportTypeSystemKey = Pair.of(project, "exportTypeSystem");
TypeSystemDescription exportTypeSystem = (TypeSystemDescription) bulkOperationContext.get(exportTypeSystemKey);
if (exportTypeSystem == null) {
exportTypeSystem = annotationService.getTypeSystemForExport(project);
bulkOperationContext.put(exportTypeSystemKey, exportTypeSystem);
}
try (CasStorageSession session = CasStorageSession.openNested()) {
// Update type system the CAS, compact it (remove all non-reachable feature structures)
// and remove all internal feature structures in the process
CAS exportCas = WebAnnoCasUtil.createCas();
session.add(EXPORT_CAS, CasAccessMode.EXCLUSIVE_WRITE_ACCESS, exportCas);
annotationService.prepareCasForExport(aCas, exportCas, aDocument, exportTypeSystem);
// Update the source file name in case it is changed for some reason. This is necessary
// for the writers to create the files under the correct names.
File currentDocumentUri = new File(repositoryProperties.getPath().getAbsolutePath() + "/" + PROJECT_FOLDER + "/" + project.getId() + "/" + DOCUMENT_FOLDER + "/" + aDocument.getId() + "/" + SOURCE_FOLDER);
DocumentMetaData documentMetadata = DocumentMetaData.get(exportCas.getJCas());
documentMetadata.setDocumentBaseUri(currentDocumentUri.toURI().toURL().toExternalForm());
documentMetadata.setDocumentUri(new File(currentDocumentUri, aFileName).toURI().toURL().toExternalForm());
documentMetadata.setCollectionId(currentDocumentUri.toURI().toURL().toExternalForm());
documentMetadata.setDocumentId(aFileName);
// update with the correct tagset name
Pair<Project, String> annotationFeaturesKey = Pair.of(project, "annotationFeatures");
@SuppressWarnings("unchecked") List<AnnotationFeature> features = (List<AnnotationFeature>) bulkOperationContext.get(annotationFeaturesKey);
if (features == null) {
features = annotationService.listAnnotationFeature(project);
bulkOperationContext.put(annotationFeaturesKey, features);
}
for (AnnotationFeature feature : features) {
TagSet tagSet = feature.getTagset();
if (tagSet == null || CHAIN_TYPE.equals(feature.getLayer().getType())) {
continue;
}
updateCasWithTagSet(exportCas, feature.getLayer().getName(), tagSet.getName());
}
File exportTempDir = createTempFile("webanno", "export");
try {
exportTempDir.delete();
exportTempDir.mkdirs();
AnalysisEngineDescription writer = aFormat.getWriterDescription(aDocument.getProject(), exportTypeSystem, exportCas);
addConfigurationParameters(writer, JCasFileWriter_ImplBase.PARAM_USE_DOCUMENT_ID, true, JCasFileWriter_ImplBase.PARAM_ESCAPE_FILENAME, false, JCasFileWriter_ImplBase.PARAM_TARGET_LOCATION, exportTempDir, JCasFileWriter_ImplBase.PARAM_STRIP_EXTENSION, aStripExtension);
// Not using SimplePipeline.runPipeline here now because it internally works with an
// aggregate engine which is slow due to
// https://issues.apache.org/jira/browse/UIMA-6200
AnalysisEngine engine = null;
try {
engine = createEngine(writer);
engine.process(getRealCas(exportCas));
collectionProcessComplete(engine);
} finally {
destroy(engine);
}
// If the writer produced more than one file, we package it up as a ZIP file
File exportFile;
if (exportTempDir.listFiles().length > 1) {
exportFile = new File(exportTempDir.getAbsolutePath() + ".zip");
try {
zipFolder(exportTempDir, exportFile);
} catch (Exception e) {
try (MDC.MDCCloseable closable = MDC.putCloseable(Logging.KEY_PROJECT_ID, String.valueOf(project.getId()))) {
log.info("Unable to create zip File");
}
}
} else {
exportFile = new File(exportTempDir.getParent(), exportTempDir.listFiles()[0].getName());
copyFile(exportTempDir.listFiles()[0], exportFile);
}
return exportFile;
} finally {
if (exportTempDir != null) {
forceDelete(exportTempDir);
}
}
}
}
Aggregations