use of de.tudarmstadt.ukp.clarin.webanno.api.format.FormatSupport in project webanno by webanno.
the class AnnotationDocumentExporter method exportAnnotationDocumentContents.
private void exportAnnotationDocumentContents(ProjectExportRequest aRequest, ProjectExportTaskMonitor aMonitor, ExportedProject aExProject, File aStage) throws UIMAException, ClassNotFoundException, IOException {
Project project = aRequest.getProject();
// The export process may store project-related information in this context to ensure it
// is looked up only once during the bulk operation and the DB is not hit too often.
Map<Pair<Project, String>, Object> bulkOperationContext = new HashMap<>();
List<SourceDocument> documents = documentService.listSourceDocuments(project);
int i = 1;
int initProgress = aMonitor.getProgress();
// Create a map containing the annotation documents for each source document. Doing this
// as one DB access before the main processing to avoid hammering the DB in the loops
// below.
Map<SourceDocument, List<AnnotationDocument>> srcToAnnIdx = documentService.listAnnotationDocuments(project).stream().collect(groupingBy(doc -> doc.getDocument(), toList()));
// Cache user lookups to avoid constantly hitting the database
LoadingCache<String, User> usersCache = Caffeine.newBuilder().build(key -> userRepository.get(key));
for (SourceDocument srcDoc : documents) {
try (CasStorageSession session = CasStorageSession.openNested()) {
// If the initial CAS does not exist yet, it must be created before export.
if (!documentService.existsInitialCas(srcDoc)) {
documentService.createOrReadInitialCas(srcDoc);
}
File targetDir = new File(aStage, ANNOTATION_CAS_FOLDER + srcDoc.getName());
forceMkdir(targetDir);
File initialCasFile = documentService.getCasFile(srcDoc, INITIAL_CAS_PSEUDO_USER);
copyFileToDirectory(initialCasFile, targetDir);
log.info("Exported annotation document content for user [" + INITIAL_CAS_PSEUDO_USER + "] for source document [" + srcDoc.getId() + "] in project [" + project.getName() + "] with id [" + project.getId() + "]");
//
// Export per-user annotation document
//
// Determine which format to use for export
String formatId = FORMAT_AUTO.equals(aRequest.getFormat()) ? srcDoc.getFormat() : aRequest.getFormat();
FormatSupport format = importExportService.getWritableFormatById(formatId).orElseGet(() -> {
FormatSupport fallbackFormat = new WebAnnoTsv3FormatSupport();
aMonitor.addMessage(LogMessage.warn(this, "Annotation: [%s] No writer " + "found for original format [%s] - exporting as [%s] " + "instead.", srcDoc.getName(), formatId, fallbackFormat.getName()));
return fallbackFormat;
});
// Export annotations from regular users
for (AnnotationDocument annDoc : srcToAnnIdx.computeIfAbsent(srcDoc, key -> emptyList())) {
// annotation document is not NEW/IGNORE
if (usersCache.get(annDoc.getUser()) != null && !annDoc.getState().equals(AnnotationDocumentState.NEW) && !annDoc.getState().equals(AnnotationDocumentState.IGNORE)) {
File annSerDir = new File(aStage.getAbsolutePath() + ANNOTATION_CAS_FOLDER + srcDoc.getName());
File annDocDir = new File(aStage.getAbsolutePath() + ANNOTATION_ORIGINAL_FOLDER + srcDoc.getName());
forceMkdir(annSerDir);
forceMkdir(annDocDir);
File annSerFile = documentService.getCasFile(srcDoc, annDoc.getUser());
File annFile = null;
if (annSerFile.exists()) {
annFile = importExportService.exportAnnotationDocument(srcDoc, annDoc.getUser(), format, annDoc.getUser(), ANNOTATION, false, bulkOperationContext);
}
if (annSerFile.exists()) {
copyFileToDirectory(annSerFile, annSerDir);
copyFileToDirectory(annFile, annDocDir);
forceDelete(annFile);
}
log.info("Exported annotation document content for user [" + annDoc.getUser() + "] for source document [" + srcDoc.getId() + "] in project [" + project.getName() + "] with id [" + project.getId() + "]");
}
}
// correction type projects.
if (PROJECT_TYPE_AUTOMATION.equals(project.getMode()) || PROJECT_TYPE_CORRECTION.equals(project.getMode())) {
File corrSerFile = documentService.getCasFile(srcDoc, CORRECTION_USER);
if (corrSerFile.exists()) {
// Copy CAS - this is used when importing the project again
// Util WebAnno 3.4.x, the CORRECTION_USER CAS was exported to 'curation'
// and 'curation_ser'.
// Since WebAnno 3.5.x, the CORRECTION_USER CAS is exported to 'annotation'
// and 'annotation_ser'.
File curationSerDir = new File(aStage + ANNOTATION_AS_SERIALISED_CAS + srcDoc.getName());
forceMkdir(curationSerDir);
copyFileToDirectory(corrSerFile, curationSerDir);
// Copy secondary export format for convenience - not used during import
File curationDir = new File(aStage + ANNOTATION_ORIGINAL_FOLDER + srcDoc.getName());
forceMkdir(curationDir);
File corrFile = importExportService.exportAnnotationDocument(srcDoc, CORRECTION_USER, format, CORRECTION_USER, CORRECTION);
copyFileToDirectory(corrFile, curationDir);
forceDelete(corrFile);
}
}
}
aMonitor.setProgress(initProgress + (int) ceil(((double) i) / documents.size() * 80.0));
i++;
}
}
use of de.tudarmstadt.ukp.clarin.webanno.api.format.FormatSupport in project webanno by webanno.
the class CuratedDocumentsExporter method exportData.
/**
* Copy, if exists, curation documents to a folder that will be exported as Zip file
*
* @param aStage
* The folder where curated documents are copied to be exported as Zip File
*/
@Override
public void exportData(ProjectExportRequest aRequest, ProjectExportTaskMonitor aMonitor, ExportedProject aExProject, File aStage) throws Exception {
Project project = aRequest.getProject();
// The export process may store project-related information in this context to ensure it
// is looked up only once during the bulk operation and the DB is not hit too often.
Map<Pair<Project, String>, Object> bulkOperationContext = new HashMap<>();
// Get all the source documents from the project
List<SourceDocument> documents = documentService.listSourceDocuments(project);
int initProgress = aMonitor.getProgress() - 1;
int i = 1;
for (SourceDocument sourceDocument : documents) {
File curationCasDir = new File(aStage, CURATION_CAS_FOLDER + sourceDocument.getName());
forceMkdir(curationCasDir);
File curationDir = new File(aStage, CURATION_FOLDER + sourceDocument.getName());
forceMkdir(curationDir);
// finished or also the ones that are in progress
if ((aRequest.isIncludeInProgress() && CURATION_IN_PROGRESS.equals(sourceDocument.getState())) || CURATION_FINISHED.equals(sourceDocument.getState())) {
File curationCasFile = documentService.getCasFile(sourceDocument, CURATION_USER);
if (curationCasFile.exists()) {
// Copy CAS - this is used when importing the project again
copyFileToDirectory(curationCasFile, curationCasDir);
// Determine which format to use for export
String formatId = FORMAT_AUTO.equals(aRequest.getFormat()) ? sourceDocument.getFormat() : aRequest.getFormat();
FormatSupport format = importExportService.getWritableFormatById(formatId).orElseGet(() -> {
FormatSupport fallbackFormat = new WebAnnoTsv3FormatSupport();
aMonitor.addMessage(LogMessage.warn(this, "Curation: [%s] No writer" + " found for original format [%s] - exporting as [%s] " + "instead.", sourceDocument.getName(), formatId, fallbackFormat.getName()));
return fallbackFormat;
});
// Copy secondary export format for convenience - not used during import
try {
File curationFile = importExportService.exportAnnotationDocument(sourceDocument, CURATION_USER, format, CURATION_USER, CURATION, true, bulkOperationContext);
copyFileToDirectory(curationFile, curationDir);
forceDelete(curationFile);
} catch (Exception e) {
// ExceptionUtils.getRootCauseMessage(e) );
throw new ProjectExportException("Aborting due to unrecoverable error while exporting!");
}
}
}
aMonitor.setProgress(initProgress + (int) ceil(((double) i) / documents.size() * 10.0));
i++;
}
}
use of de.tudarmstadt.ukp.clarin.webanno.api.format.FormatSupport in project webanno by webanno.
the class ImportExportServiceImpl method importCasFromFile.
@Override
public CAS importCasFromFile(File aFile, Project aProject, String aFormatId, TypeSystemDescription aFullProjectTypeSystem) throws UIMAException, IOException {
TypeSystemDescription tsd = aFullProjectTypeSystem;
if (tsd == null) {
tsd = annotationService.getFullProjectTypeSystem(aProject);
}
// Prepare a CAS with the project type system
CAS cas = CasFactory.createCas(tsd);
// Convert the source document to CAS
FormatSupport format = getReadableFormatById(aFormatId).orElseThrow(() -> new IOException("No reader available for format [" + aFormatId + "]"));
CollectionReaderDescription readerDescription = format.getReaderDescription(tsd);
addConfigurationParameters(readerDescription, ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, aFile.getParentFile().getAbsolutePath(), ResourceCollectionReaderBase.PARAM_PATTERNS, "[+]" + aFile.getName());
CollectionReader reader = createReader(readerDescription);
if (!reader.hasNext()) {
throw new FileNotFoundException("Source file [" + aFile.getName() + "] not found in [" + aFile.getPath() + "]");
}
reader.getNext(cas);
// Create sentence / token annotations if they are missing
boolean hasTokens = exists(cas, getType(cas, Token.class));
boolean hasSentences = exists(cas, getType(cas, Sentence.class));
if (!hasSentences) {
splitSentences(cas);
}
if (!hasTokens) {
tokenize(cas);
}
if (!exists(cas, getType(cas, Token.class)) || !exists(cas, getType(cas, Sentence.class))) {
throw new IOException("The document appears to be empty. Unable to detect any " + "tokens or sentences. Empty documents cannot be imported.");
}
return cas;
}
use of de.tudarmstadt.ukp.clarin.webanno.api.format.FormatSupport in project webanno by webanno.
the class ProjectExportCuratedDocumentsTask method exportCuratedDocuments.
/**
* Copy, if exists, curation documents to a folder that will be exported as Zip file
*
* @param aCopyDir
* The folder where curated documents are copied to be exported as Zip File
*/
private void exportCuratedDocuments(ProjectExportRequest aModel, File aCopyDir, boolean aIncludeInProgress, ProjectExportTaskMonitor aMonitor) throws ProjectExportException, IOException {
Project project = aModel.getProject();
// Get all the source documents from the project
List<de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument> documents = documentService.listSourceDocuments(project);
// Determine which format to use for export.
FormatSupport format;
if (FORMAT_AUTO.equals(aModel.getFormat())) {
format = new WebAnnoTsv3FormatSupport();
} else {
format = importExportService.getWritableFormatById(aModel.getFormat()).orElseGet(() -> {
// aModel.getFormat());
return new WebAnnoTsv3FormatSupport();
});
}
int initProgress = aMonitor.getProgress() - 1;
int i = 1;
for (de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument sourceDocument : documents) {
File curationCasDir = new File(aCopyDir + CURATION_AS_SERIALISED_CAS + sourceDocument.getName());
FileUtils.forceMkdir(curationCasDir);
File curationDir = new File(aCopyDir + CURATION_FOLDER + sourceDocument.getName());
FileUtils.forceMkdir(curationDir);
// finished or also the ones that are in progress
if ((aIncludeInProgress && SourceDocumentState.CURATION_IN_PROGRESS.equals(sourceDocument.getState())) || SourceDocumentState.CURATION_FINISHED.equals(sourceDocument.getState())) {
File curationCasFile = documentService.getCasFile(sourceDocument, WebAnnoConst.CURATION_USER);
if (curationCasFile.exists()) {
// Copy CAS - this is used when importing the project again
FileUtils.copyFileToDirectory(curationCasFile, curationCasDir);
// Copy secondary export format for convenience - not used during import
try {
File curationFile = importExportService.exportAnnotationDocument(sourceDocument, WebAnnoConst.CURATION_USER, format, WebAnnoConst.CURATION_USER, Mode.CURATION);
FileUtils.copyFileToDirectory(curationFile, curationDir);
FileUtils.forceDelete(curationFile);
} catch (Exception e) {
// ExceptionUtils.getRootCauseMessage(e) );
throw new ProjectExportException("Aborting due to unrecoverable error while exporting!");
}
}
}
aMonitor.setProgress(initProgress + (int) Math.ceil(((double) i) / documents.size() * 10.0));
i++;
}
}
use of de.tudarmstadt.ukp.clarin.webanno.api.format.FormatSupport in project webanno by webanno.
the class LegacyRemoteApiController method curationDocumentRead.
/**
* Download curated document with requested parameters
*
* Test when running in Eclipse: Open your browser, paste following URL with appropriate values:
*
* http://USERNAME:PASSWORD@localhost:8080/webanno-webapp/api/projects/{aProjectId}/curationdoc/
* {aSourceDocumentId}?format=xmi
*
* @param response
* HttpServletResponse.
* @param aProjectId
* {@link Project} ID.
* @param aSourceDocumentId
* {@link SourceDocument} ID.
* @param aFormatId
* Export format.
* @throws Exception
* if there was an error.
*/
@RequestMapping(value = "/" + PROJECTS + "/{" + PARAM_PROJECT_ID + "}/" + CURATION + "/{" + PARAM_DOCUMENT_ID + "}", method = RequestMethod.GET)
public void curationDocumentRead(HttpServletResponse response, @PathVariable(PARAM_PROJECT_ID) long aProjectId, @PathVariable(PARAM_DOCUMENT_ID) long aSourceDocumentId, @RequestParam(value = PARAM_FORMAT, required = false) String aFormatId) throws Exception {
// Get current user
String username = SecurityContextHolder.getContext().getAuthentication().getName();
User user = userRepository.get(username);
if (user == null) {
response.sendError(HttpStatus.BAD_REQUEST.value(), "User [" + username + "] not found.");
return;
}
// Get project
Project project;
try {
project = projectRepository.getProject(aProjectId);
} catch (NoResultException e) {
response.sendError(HttpStatus.NOT_FOUND.value(), "Project" + aProjectId + "] not found.");
return;
}
// Check for the access
boolean hasAccess = projectRepository.isManager(project, user) || userRepository.isAdministrator(user);
if (!hasAccess) {
response.sendError(HttpStatus.FORBIDDEN.value(), "User [" + username + "] is not allowed to access project [" + aProjectId + "]");
return;
}
// Get source document
SourceDocument srcDocument;
try {
srcDocument = documentRepository.getSourceDocument(aProjectId, aSourceDocumentId);
} catch (NoResultException e) {
response.sendError(HttpStatus.NOT_FOUND.value(), "Source document [" + aSourceDocumentId + "] not found in project [" + aProjectId + "] not found.");
return;
}
// Check if curation is complete
if (!SourceDocumentState.CURATION_FINISHED.equals(srcDocument.getState())) {
response.sendError(HttpStatus.NOT_FOUND.value(), "Curation of source document [" + aSourceDocumentId + "] not yet complete.");
return;
}
String formatId;
if (aFormatId == null) {
formatId = srcDocument.getFormat();
} else {
formatId = aFormatId;
}
// Determine the format
FormatSupport format = importExportService.getWritableFormatById(formatId).orElseGet(() -> {
LOG.info("[{}] Format [{}] is not writable - exporting as WebAnno TSV3 instead.", srcDocument.getName(), formatId);
return new WebAnnoTsv3FormatSupport();
});
// Temporary file of annotation document
File downloadableFile = importExportService.exportAnnotationDocument(srcDocument, WebAnnoConst.CURATION_USER, format, srcDocument.getName(), Mode.CURATION);
try {
// Set mime type
String mimeType = URLConnection.guessContentTypeFromName(downloadableFile.getName());
if (mimeType == null) {
LOG.info("mimetype is not detectable, will take default");
mimeType = "application/octet-stream";
}
// Set response
response.setContentType(mimeType);
response.setContentType("application/force-download");
response.setHeader("Content-Disposition", "inline; filename=\"" + downloadableFile.getName() + "\"");
response.setContentLength((int) downloadableFile.length());
InputStream inputStream = new BufferedInputStream(new FileInputStream(downloadableFile));
FileCopyUtils.copy(inputStream, response.getOutputStream());
} catch (Exception e) {
LOG.info("Exception occured" + e.getMessage());
} finally {
if (downloadableFile.exists()) {
downloadableFile.delete();
}
}
}
Aggregations