Search in sources :

Example 1 with CasDoctorException

use of de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctorException in project webanno by webanno.

the class CasStorageServiceImpl method realWriteCas.

private void realWriteCas(Project aProject, String aDocumentName, long aDocumentId, JCas aJcas, String aUserName, File aAnnotationFolder, File aTargetPath) throws IOException {
    log.debug("Writing annotation document [{}]({}) for user [{}] in project [{}]({})", aDocumentName, aDocumentId, aUserName, aProject.getName(), aProject.getId());
    try {
        if (casDoctor != null) {
            casDoctor.analyze(aProject, aJcas.getCas());
        }
    } catch (CasDoctorException e) {
        StringBuilder detailMsg = new StringBuilder();
        detailMsg.append("CAS Doctor found problems for user [").append(aUserName).append("] in source document [").append(aDocumentName).append("] (").append(aDocumentId).append(") in project[").append(aProject.getName()).append("] (").append(aProject.getId()).append(")\n");
        e.getDetails().forEach(m -> detailMsg.append(String.format("- [%s] %s%n", m.level, m.message)));
        throw new DataRetrievalFailureException(detailMsg.toString());
    } catch (Exception e) {
        throw new DataRetrievalFailureException("Error analyzing CAS of user [" + aUserName + "] in source document [" + aDocumentName + "] (" + aDocumentId + ") in project [" + aProject.getName() + "] (" + aProject.getId() + ")", e);
    }
    synchronized (lock) {
        // File annotationFolder = getAnnotationFolder(aDocument);
        FileUtils.forceMkdir(aAnnotationFolder);
        final String username = aUserName;
        File currentVersion = new File(aAnnotationFolder, username + ".ser");
        File oldVersion = new File(aAnnotationFolder, username + ".ser.old");
        // Save current version
        try {
            // Make a backup of the current version of the file before overwriting
            if (currentVersion.exists()) {
                renameFile(currentVersion, oldVersion);
            }
            // Now write the new version to "<username>.ser" or CURATION_USER.ser
            DocumentMetaData md;
            try {
                md = DocumentMetaData.get(aJcas);
            } catch (IllegalArgumentException e) {
                md = DocumentMetaData.create(aJcas);
            }
            md.setDocumentId(aUserName);
            // File targetPath = getAnnotationFolder(aDocument);
            CasPersistenceUtils.writeSerializedCas(aJcas, new File(aTargetPath, aUserName + ".ser"));
            try (MDC.MDCCloseable closable = MDC.putCloseable(Logging.KEY_PROJECT_ID, String.valueOf(aProject.getId()))) {
                log.info("Updated annotations for user [{}] on document [{}]({}) in project [{}]({})", aUserName, aDocumentName, aDocumentId, aProject.getName(), aProject.getId());
            }
            // If the saving was successful, we delete the old version
            if (oldVersion.exists()) {
                FileUtils.forceDelete(oldVersion);
            }
        } catch (IOException e) {
            // If we could not save the new version, restore the old one.
            FileUtils.forceDelete(currentVersion);
            // If this is the first version, there is no old version, so do not restore anything
            if (oldVersion.exists()) {
                renameFile(oldVersion, currentVersion);
            }
            // Now abort anyway
            throw e;
        }
        // Manage history
        if (backupInterval > 0) {
            // Determine the reference point in time based on the current version
            long now = currentVersion.lastModified();
            // Get all history files for the current user
            File[] history = aAnnotationFolder.listFiles(new FileFilter() {

                private final Matcher matcher = Pattern.compile(Pattern.quote(username) + "\\.ser\\.[0-9]+\\.bak").matcher("");

                @Override
                public boolean accept(File aFile) {
                    // Check if the filename matches the pattern given above.
                    return matcher.reset(aFile.getName()).matches();
                }
            });
            // Sort the files (oldest one first)
            Arrays.sort(history, LastModifiedFileComparator.LASTMODIFIED_COMPARATOR);
            // Check if we need to make a new history file
            boolean historyFileCreated = false;
            File historyFile = new File(aAnnotationFolder, username + ".ser." + now + ".bak");
            if (history.length == 0) {
                // If there is no history yet but we should keep history, then we create a
                // history file in any case.
                FileUtils.copyFile(currentVersion, historyFile);
                historyFileCreated = true;
            } else {
                // Check if the newest history file is significantly older than the current one
                File latestHistory = history[history.length - 1];
                if (latestHistory.lastModified() + backupInterval < now) {
                    FileUtils.copyFile(currentVersion, historyFile);
                    historyFileCreated = true;
                }
            }
            // Prune history based on number of backup
            if (historyFileCreated) {
                // The new version is not in the history, so we keep that in any case. That
                // means we need to keep one less.
                int toKeep = Math.max(backupKeepNumber - 1, 0);
                if ((backupKeepNumber > 0) && (toKeep < history.length)) {
                    // Copy the oldest files to a new array
                    File[] toRemove = new File[history.length - toKeep];
                    System.arraycopy(history, 0, toRemove, 0, toRemove.length);
                    // Restrict the history to what is left
                    File[] newHistory = new File[toKeep];
                    if (toKeep > 0) {
                        System.arraycopy(history, toRemove.length, newHistory, 0, newHistory.length);
                    }
                    history = newHistory;
                    // Remove these old files
                    for (File file : toRemove) {
                        FileUtils.forceDelete(file);
                        try (MDC.MDCCloseable closable = MDC.putCloseable(Logging.KEY_PROJECT_ID, String.valueOf(aProject.getId()))) {
                            log.info("Removed surplus history file [{}] of user [{}] for " + "document [{}]({}) in project [{}]({})", file.getName(), aUserName, aDocumentName, aDocumentId, aProject.getName(), aProject.getId());
                        }
                    }
                }
                // Prune history based on time
                if (backupKeepTime > 0) {
                    for (File file : history) {
                        if ((file.lastModified() + backupKeepTime) < now) {
                            FileUtils.forceDelete(file);
                            try (MDC.MDCCloseable closable = MDC.putCloseable(Logging.KEY_PROJECT_ID, String.valueOf(aProject.getId()))) {
                                log.info("Removed outdated history file [{}] of user [{}] for " + "document [{}]({}) in project [{}]({})", file.getName(), aUserName, aDocumentName, aDocumentId, aProject.getName(), aProject.getId());
                            }
                        }
                    }
                }
            }
        }
    }
}
Also used : Arrays(java.util.Arrays) CasDoctorException(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctorException) LoggerFactory(org.slf4j.LoggerFactory) CasStorageService(de.tudarmstadt.ukp.clarin.webanno.api.CasStorageService) CAS(org.apache.uima.cas.CAS) Autowired(org.springframework.beans.factory.annotation.Autowired) HashMap(java.util.HashMap) DataRetrievalFailureException(org.springframework.dao.DataRetrievalFailureException) InitializingBean(org.springframework.beans.factory.InitializingBean) RequestCycle(org.apache.wicket.request.cycle.RequestCycle) Value(org.springframework.beans.factory.annotation.Value) TypeSystemDescription(org.apache.uima.resource.metadata.TypeSystemDescription) DocumentMetaData(de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData) Matcher(java.util.regex.Matcher) User(de.tudarmstadt.ukp.clarin.webanno.security.model.User) Map(java.util.Map) Project(de.tudarmstadt.ukp.clarin.webanno.model.Project) UIMAException(org.apache.uima.UIMAException) JCas(org.apache.uima.jcas.JCas) Logger(org.slf4j.Logger) PROJECT_FOLDER(de.tudarmstadt.ukp.clarin.webanno.api.ProjectService.PROJECT_FOLDER) IOException(java.io.IOException) FileUtils(org.apache.commons.io.FileUtils) AnnotationDocument(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationDocument) ANNOTATION_FOLDER(de.tudarmstadt.ukp.clarin.webanno.api.ProjectService.ANNOTATION_FOLDER) File(java.io.File) FileNotFoundException(java.io.FileNotFoundException) CasCreationUtils(org.apache.uima.util.CasCreationUtils) Component(org.springframework.stereotype.Component) FileFilter(java.io.FileFilter) CasDoctor(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor) AbstractRequestCycleListener(org.apache.wicket.request.cycle.AbstractRequestCycleListener) DOCUMENT_FOLDER(de.tudarmstadt.ukp.clarin.webanno.api.ProjectService.DOCUMENT_FOLDER) MetaDataKey(org.apache.wicket.MetaDataKey) MDC(org.slf4j.MDC) LastModifiedFileComparator(org.apache.commons.io.comparator.LastModifiedFileComparator) SourceDocument(de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument) Entry(java.util.Map.Entry) Pattern(java.util.regex.Pattern) Logging(de.tudarmstadt.ukp.clarin.webanno.support.logging.Logging) Matcher(java.util.regex.Matcher) IOException(java.io.IOException) MDC(org.slf4j.MDC) CasDoctorException(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctorException) DataRetrievalFailureException(org.springframework.dao.DataRetrievalFailureException) UIMAException(org.apache.uima.UIMAException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) CasDoctorException(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctorException) DataRetrievalFailureException(org.springframework.dao.DataRetrievalFailureException) DocumentMetaData(de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData) FileFilter(java.io.FileFilter) File(java.io.File)

Aggregations

CasStorageService (de.tudarmstadt.ukp.clarin.webanno.api.CasStorageService)1 ANNOTATION_FOLDER (de.tudarmstadt.ukp.clarin.webanno.api.ProjectService.ANNOTATION_FOLDER)1 DOCUMENT_FOLDER (de.tudarmstadt.ukp.clarin.webanno.api.ProjectService.DOCUMENT_FOLDER)1 PROJECT_FOLDER (de.tudarmstadt.ukp.clarin.webanno.api.ProjectService.PROJECT_FOLDER)1 CasDoctor (de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor)1 CasDoctorException (de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctorException)1 AnnotationDocument (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationDocument)1 Project (de.tudarmstadt.ukp.clarin.webanno.model.Project)1 SourceDocument (de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument)1 User (de.tudarmstadt.ukp.clarin.webanno.security.model.User)1 Logging (de.tudarmstadt.ukp.clarin.webanno.support.logging.Logging)1 DocumentMetaData (de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData)1 File (java.io.File)1 FileFilter (java.io.FileFilter)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 Arrays (java.util.Arrays)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 Entry (java.util.Map.Entry)1