use of de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.INITIAL_CAS_PSEUDO_USER in project webanno by webanno.
the class AnnotationDocumentExporter method exportAnnotationDocumentContents.
private void exportAnnotationDocumentContents(ProjectExportRequest aRequest, ProjectExportTaskMonitor aMonitor, ExportedProject aExProject, File aStage) throws UIMAException, ClassNotFoundException, IOException {
Project project = aRequest.getProject();
// The export process may store project-related information in this context to ensure it
// is looked up only once during the bulk operation and the DB is not hit too often.
Map<Pair<Project, String>, Object> bulkOperationContext = new HashMap<>();
List<SourceDocument> documents = documentService.listSourceDocuments(project);
int i = 1;
int initProgress = aMonitor.getProgress();
// Create a map containing the annotation documents for each source document. Doing this
// as one DB access before the main processing to avoid hammering the DB in the loops
// below.
Map<SourceDocument, List<AnnotationDocument>> srcToAnnIdx = documentService.listAnnotationDocuments(project).stream().collect(groupingBy(doc -> doc.getDocument(), toList()));
// Cache user lookups to avoid constantly hitting the database
LoadingCache<String, User> usersCache = Caffeine.newBuilder().build(key -> userRepository.get(key));
for (SourceDocument srcDoc : documents) {
try (CasStorageSession session = CasStorageSession.openNested()) {
// If the initial CAS does not exist yet, it must be created before export.
if (!documentService.existsInitialCas(srcDoc)) {
documentService.createOrReadInitialCas(srcDoc);
}
File targetDir = new File(aStage, ANNOTATION_CAS_FOLDER + srcDoc.getName());
forceMkdir(targetDir);
File initialCasFile = documentService.getCasFile(srcDoc, INITIAL_CAS_PSEUDO_USER);
copyFileToDirectory(initialCasFile, targetDir);
log.info("Exported annotation document content for user [" + INITIAL_CAS_PSEUDO_USER + "] for source document [" + srcDoc.getId() + "] in project [" + project.getName() + "] with id [" + project.getId() + "]");
//
// Export per-user annotation document
//
// Determine which format to use for export
String formatId = FORMAT_AUTO.equals(aRequest.getFormat()) ? srcDoc.getFormat() : aRequest.getFormat();
FormatSupport format = importExportService.getWritableFormatById(formatId).orElseGet(() -> {
FormatSupport fallbackFormat = new WebAnnoTsv3FormatSupport();
aMonitor.addMessage(LogMessage.warn(this, "Annotation: [%s] No writer " + "found for original format [%s] - exporting as [%s] " + "instead.", srcDoc.getName(), formatId, fallbackFormat.getName()));
return fallbackFormat;
});
// Export annotations from regular users
for (AnnotationDocument annDoc : srcToAnnIdx.computeIfAbsent(srcDoc, key -> emptyList())) {
// annotation document is not NEW/IGNORE
if (usersCache.get(annDoc.getUser()) != null && !annDoc.getState().equals(AnnotationDocumentState.NEW) && !annDoc.getState().equals(AnnotationDocumentState.IGNORE)) {
File annSerDir = new File(aStage.getAbsolutePath() + ANNOTATION_CAS_FOLDER + srcDoc.getName());
File annDocDir = new File(aStage.getAbsolutePath() + ANNOTATION_ORIGINAL_FOLDER + srcDoc.getName());
forceMkdir(annSerDir);
forceMkdir(annDocDir);
File annSerFile = documentService.getCasFile(srcDoc, annDoc.getUser());
File annFile = null;
if (annSerFile.exists()) {
annFile = importExportService.exportAnnotationDocument(srcDoc, annDoc.getUser(), format, annDoc.getUser(), ANNOTATION, false, bulkOperationContext);
}
if (annSerFile.exists()) {
copyFileToDirectory(annSerFile, annSerDir);
copyFileToDirectory(annFile, annDocDir);
forceDelete(annFile);
}
log.info("Exported annotation document content for user [" + annDoc.getUser() + "] for source document [" + srcDoc.getId() + "] in project [" + project.getName() + "] with id [" + project.getId() + "]");
}
}
// correction type projects.
if (PROJECT_TYPE_AUTOMATION.equals(project.getMode()) || PROJECT_TYPE_CORRECTION.equals(project.getMode())) {
File corrSerFile = documentService.getCasFile(srcDoc, CORRECTION_USER);
if (corrSerFile.exists()) {
// Copy CAS - this is used when importing the project again
// Util WebAnno 3.4.x, the CORRECTION_USER CAS was exported to 'curation'
// and 'curation_ser'.
// Since WebAnno 3.5.x, the CORRECTION_USER CAS is exported to 'annotation'
// and 'annotation_ser'.
File curationSerDir = new File(aStage + ANNOTATION_AS_SERIALISED_CAS + srcDoc.getName());
forceMkdir(curationSerDir);
copyFileToDirectory(corrSerFile, curationSerDir);
// Copy secondary export format for convenience - not used during import
File curationDir = new File(aStage + ANNOTATION_ORIGINAL_FOLDER + srcDoc.getName());
forceMkdir(curationDir);
File corrFile = importExportService.exportAnnotationDocument(srcDoc, CORRECTION_USER, format, CORRECTION_USER, CORRECTION);
copyFileToDirectory(corrFile, curationDir);
forceDelete(corrFile);
}
}
}
aMonitor.setProgress(initProgress + (int) ceil(((double) i) / documents.size() * 80.0));
i++;
}
}
use of de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.INITIAL_CAS_PSEUDO_USER in project webanno by webanno.
the class AnnotationDocumentsExporterTest method thatImportingCorrectionProjectWorks_3_6_1.
@Test
public void thatImportingCorrectionProjectWorks_3_6_1() throws Exception {
project.setMode(PROJECT_TYPE_CORRECTION);
// Export the project and import it again
List<Pair<SourceDocument, String>> imported = runImportAndFetchDocuments(new ZipFile("src/test/resources/exports/Export+Test+-+Curated+correction+project_3_6_1.zip"));
// Check that the curation for the document in the project is imported
assertThat(imported).extracting(p -> p.getKey().getName()).containsExactlyInAnyOrder("example_sentence.txt", "example_sentence.txt", "example_sentence.txt");
// Since WebAnno 3.5.x, the CORRECTION_USER CAS is stored with the annotations
assertThat(imported).extracting(Pair::getValue).containsExactlyInAnyOrder(INITIAL_CAS_PSEUDO_USER, "admin", CORRECTION_USER);
}
use of de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.INITIAL_CAS_PSEUDO_USER in project webanno by webanno.
the class AnnotationDocumentsExporterTest method thatImportingCorrectionProjectWorks_3_4_x.
@Test
public void thatImportingCorrectionProjectWorks_3_4_x() throws Exception {
project.setMode(PROJECT_TYPE_CORRECTION);
// Export the project and import it again
List<Pair<SourceDocument, String>> imported = runImportAndFetchDocuments(new ZipFile("src/test/resources/exports/Export+Test+-+Curated+correction+project_3_4_8.zip"));
// Check that the curation for the document in the project is imported
assertThat(imported).extracting(p -> p.getKey().getName()).containsExactlyInAnyOrder("example_sentence.txt", "example_sentence.txt");
// Before WebAnno 3.5.x, the CORRECTION_USER CAS was stored with the curations
assertThat(imported).extracting(Pair::getValue).containsExactlyInAnyOrder(INITIAL_CAS_PSEUDO_USER, "admin");
}
use of de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.INITIAL_CAS_PSEUDO_USER in project webanno by webanno.
the class AnnotationDocumentsExporterTest method thatImportingAnnotationProjectWorks_3_6_1.
@Test
public void thatImportingAnnotationProjectWorks_3_6_1() throws Exception {
project.setMode(PROJECT_TYPE_ANNOTATION);
// Export the project and import it again
List<Pair<SourceDocument, String>> imported = runImportAndFetchDocuments(new ZipFile("src/test/resources/exports/Export+Test+-+Curated+annotation+project_3_6_1.zip"));
// Check that the curation for the document in the project is imported
assertThat(imported).extracting(p -> p.getKey().getName()).containsExactlyInAnyOrder("example_sentence.txt", "example_sentence.txt");
assertThat(imported).extracting(Pair::getValue).containsExactlyInAnyOrder(INITIAL_CAS_PSEUDO_USER, "admin");
}
use of de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.INITIAL_CAS_PSEUDO_USER in project webanno by webanno.
the class ProjectCasDoctorPanel method actionRepair.
private void actionRepair(AjaxRequestTarget aTarget, Form<?> aForm) throws IOException, UIMAException, ClassNotFoundException {
CasDoctor casDoctor = new CasDoctor();
casDoctor.setApplicationContext(ApplicationContextProvider.getApplicationContext());
casDoctor.setFatalChecks(false);
casDoctor.setRepairClasses(formModel.repairs);
Project project = getModelObject();
formModel.messageSets = new ArrayList<>();
for (SourceDocument sd : documentService.listSourceDocuments(project)) {
// Repair INITIAL CAS
{
LogMessageSet messageSet = new LogMessageSet(sd.getName() + " [INITIAL]");
try {
casStorageService.forceActionOnCas(sd, INITIAL_CAS_PSEUDO_USER, (doc, user) -> createOrReadInitialCasWithoutSaving(doc, messageSet), //
(cas) -> casDoctor.repair(project, cas, messageSet.messages), true);
} catch (Exception e) {
messageSet.messages.add(new LogMessage(getClass(), LogLevel.ERROR, "Error repairing initial CAS for [" + sd.getName() + "]: " + e.getMessage()));
LOG.error("Error repairing initial CAS for [{}]", sd.getName(), e);
}
noticeIfThereAreNoMessages(messageSet);
formModel.messageSets.add(messageSet);
}
// Repair CORRECTION_USER CAS if necessary
if (PROJECT_TYPE_CORRECTION.equals(project.getMode())) {
LogMessageSet messageSet = new LogMessageSet(sd.getName() + " [" + CORRECTION_USER + "]");
try {
casStorageService.forceActionOnCas(sd, CORRECTION_USER, (doc, user) -> casStorageService.readCas(doc, user, UNMANAGED_NON_INITIALIZING_ACCESS), //
(cas) -> casDoctor.repair(project, cas, messageSet.messages), true);
} catch (FileNotFoundException e) {
// If there is no CAS for the correction user, then correction has not started
// yet. This is not a problem, so we can ignore it. (REC: I wonder if this
// assumption is correct in curation mode...)
messageSet.messages.add(LogMessage.info(getClass(), "Correction seems to have not yet started."));
} catch (Exception e) {
messageSet.messages.add(new LogMessage(getClass(), LogLevel.ERROR, "Error checking annotations for [" + CORRECTION_USER + "] for [" + sd.getName() + "]: " + e.getMessage()));
LOG.error("Error checking annotations for [{}] for [{}]", CORRECTION_USER, sd.getName(), e);
}
noticeIfThereAreNoMessages(messageSet);
formModel.messageSets.add(messageSet);
}
// Repair CURATION_USER CAS
{
LogMessageSet messageSet = new LogMessageSet(sd.getName() + " [" + CURATION_USER + "]");
try {
casStorageService.forceActionOnCas(sd, CURATION_USER, (doc, user) -> casStorageService.readCas(doc, user, UNMANAGED_NON_INITIALIZING_ACCESS), //
(cas) -> casDoctor.repair(project, cas, messageSet.messages), true);
} catch (FileNotFoundException e) {
if (asList(CURATION_IN_PROGRESS, CURATION_FINISHED).contains(sd.getState())) {
messageSet.messages.add(LogMessage.error(getClass(), "Curation CAS missing."));
} else {
// If there is no CAS for the curation user, then curation has not started
// yet. This is not a problem, so we can ignore it.
messageSet.messages.add(LogMessage.info(getClass(), "Curation has not started."));
}
} catch (Exception e) {
messageSet.messages.add(new LogMessage(getClass(), LogLevel.ERROR, "Error checking annotations for [" + CURATION_USER + "] for [" + sd.getName() + "]: " + e.getMessage()));
LOG.error("Error checking annotations for [{}] for [{}]", CURATION_USER, sd.getName(), e);
}
noticeIfThereAreNoMessages(messageSet);
formModel.messageSets.add(messageSet);
}
// Repair regular annotator CASes
for (AnnotationDocument ad : documentService.listAnnotationDocuments(sd)) {
if (documentService.existsAnnotationCas(ad)) {
LogMessageSet messageSet = new LogMessageSet(sd.getName() + " [" + ad.getUser() + "]");
try {
casStorageService.forceActionOnCas(sd, ad.getUser(), (doc, user) -> casStorageService.readCas(doc, user, UNMANAGED_NON_INITIALIZING_ACCESS), //
(cas) -> casDoctor.repair(project, cas, messageSet.messages), true);
} catch (Exception e) {
messageSet.messages.add(new LogMessage(getClass(), LogLevel.ERROR, "Error repairing annotations of user [" + ad.getUser() + "] for [" + sd.getName() + "]: " + e.getMessage()));
LOG.error("Error repairing annotations of user [{}] for [{}]", ad.getUser(), sd.getName(), e);
}
noticeIfThereAreNoMessages(messageSet);
formModel.messageSets.add(messageSet);
}
}
}
aTarget.add(this);
}
Aggregations