Search in sources :

Example 1 with CasDoctor

use of de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor in project webanno by webanno.

the class CasStorageServiceImpl method realWriteCas.

private void realWriteCas(Project aProject, String aDocumentName, long aDocumentId, JCas aJcas, String aUserName, File aAnnotationFolder, File aTargetPath) throws IOException {
    log.debug("Writing annotation document [{}]({}) for user [{}] in project [{}]({})", aDocumentName, aDocumentId, aUserName, aProject.getName(), aProject.getId());
    try {
        if (casDoctor != null) {
            casDoctor.analyze(aProject, aJcas.getCas());
        }
    } catch (CasDoctorException e) {
        StringBuilder detailMsg = new StringBuilder();
        detailMsg.append("CAS Doctor found problems for user [").append(aUserName).append("] in source document [").append(aDocumentName).append("] (").append(aDocumentId).append(") in project[").append(aProject.getName()).append("] (").append(aProject.getId()).append(")\n");
        e.getDetails().forEach(m -> detailMsg.append(String.format("- [%s] %s%n", m.level, m.message)));
        throw new DataRetrievalFailureException(detailMsg.toString());
    } catch (Exception e) {
        throw new DataRetrievalFailureException("Error analyzing CAS of user [" + aUserName + "] in source document [" + aDocumentName + "] (" + aDocumentId + ") in project [" + aProject.getName() + "] (" + aProject.getId() + ")", e);
    }
    synchronized (lock) {
        // File annotationFolder = getAnnotationFolder(aDocument);
        FileUtils.forceMkdir(aAnnotationFolder);
        final String username = aUserName;
        File currentVersion = new File(aAnnotationFolder, username + ".ser");
        File oldVersion = new File(aAnnotationFolder, username + ".ser.old");
        // Save current version
        try {
            // Make a backup of the current version of the file before overwriting
            if (currentVersion.exists()) {
                renameFile(currentVersion, oldVersion);
            }
            // Now write the new version to "<username>.ser" or CURATION_USER.ser
            DocumentMetaData md;
            try {
                md = DocumentMetaData.get(aJcas);
            } catch (IllegalArgumentException e) {
                md = DocumentMetaData.create(aJcas);
            }
            md.setDocumentId(aUserName);
            // File targetPath = getAnnotationFolder(aDocument);
            CasPersistenceUtils.writeSerializedCas(aJcas, new File(aTargetPath, aUserName + ".ser"));
            try (MDC.MDCCloseable closable = MDC.putCloseable(Logging.KEY_PROJECT_ID, String.valueOf(aProject.getId()))) {
                log.info("Updated annotations for user [{}] on document [{}]({}) in project [{}]({})", aUserName, aDocumentName, aDocumentId, aProject.getName(), aProject.getId());
            }
            // If the saving was successful, we delete the old version
            if (oldVersion.exists()) {
                FileUtils.forceDelete(oldVersion);
            }
        } catch (IOException e) {
            // If we could not save the new version, restore the old one.
            FileUtils.forceDelete(currentVersion);
            // If this is the first version, there is no old version, so do not restore anything
            if (oldVersion.exists()) {
                renameFile(oldVersion, currentVersion);
            }
            // Now abort anyway
            throw e;
        }
        // Manage history
        if (backupInterval > 0) {
            // Determine the reference point in time based on the current version
            long now = currentVersion.lastModified();
            // Get all history files for the current user
            File[] history = aAnnotationFolder.listFiles(new FileFilter() {

                private final Matcher matcher = Pattern.compile(Pattern.quote(username) + "\\.ser\\.[0-9]+\\.bak").matcher("");

                @Override
                public boolean accept(File aFile) {
                    // Check if the filename matches the pattern given above.
                    return matcher.reset(aFile.getName()).matches();
                }
            });
            // Sort the files (oldest one first)
            Arrays.sort(history, LastModifiedFileComparator.LASTMODIFIED_COMPARATOR);
            // Check if we need to make a new history file
            boolean historyFileCreated = false;
            File historyFile = new File(aAnnotationFolder, username + ".ser." + now + ".bak");
            if (history.length == 0) {
                // If there is no history yet but we should keep history, then we create a
                // history file in any case.
                FileUtils.copyFile(currentVersion, historyFile);
                historyFileCreated = true;
            } else {
                // Check if the newest history file is significantly older than the current one
                File latestHistory = history[history.length - 1];
                if (latestHistory.lastModified() + backupInterval < now) {
                    FileUtils.copyFile(currentVersion, historyFile);
                    historyFileCreated = true;
                }
            }
            // Prune history based on number of backup
            if (historyFileCreated) {
                // The new version is not in the history, so we keep that in any case. That
                // means we need to keep one less.
                int toKeep = Math.max(backupKeepNumber - 1, 0);
                if ((backupKeepNumber > 0) && (toKeep < history.length)) {
                    // Copy the oldest files to a new array
                    File[] toRemove = new File[history.length - toKeep];
                    System.arraycopy(history, 0, toRemove, 0, toRemove.length);
                    // Restrict the history to what is left
                    File[] newHistory = new File[toKeep];
                    if (toKeep > 0) {
                        System.arraycopy(history, toRemove.length, newHistory, 0, newHistory.length);
                    }
                    history = newHistory;
                    // Remove these old files
                    for (File file : toRemove) {
                        FileUtils.forceDelete(file);
                        try (MDC.MDCCloseable closable = MDC.putCloseable(Logging.KEY_PROJECT_ID, String.valueOf(aProject.getId()))) {
                            log.info("Removed surplus history file [{}] of user [{}] for " + "document [{}]({}) in project [{}]({})", file.getName(), aUserName, aDocumentName, aDocumentId, aProject.getName(), aProject.getId());
                        }
                    }
                }
                // Prune history based on time
                if (backupKeepTime > 0) {
                    for (File file : history) {
                        if ((file.lastModified() + backupKeepTime) < now) {
                            FileUtils.forceDelete(file);
                            try (MDC.MDCCloseable closable = MDC.putCloseable(Logging.KEY_PROJECT_ID, String.valueOf(aProject.getId()))) {
                                log.info("Removed outdated history file [{}] of user [{}] for " + "document [{}]({}) in project [{}]({})", file.getName(), aUserName, aDocumentName, aDocumentId, aProject.getName(), aProject.getId());
                            }
                        }
                    }
                }
            }
        }
    }
}
Also used : Arrays(java.util.Arrays) CasDoctorException(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctorException) LoggerFactory(org.slf4j.LoggerFactory) CasStorageService(de.tudarmstadt.ukp.clarin.webanno.api.CasStorageService) CAS(org.apache.uima.cas.CAS) Autowired(org.springframework.beans.factory.annotation.Autowired) HashMap(java.util.HashMap) DataRetrievalFailureException(org.springframework.dao.DataRetrievalFailureException) InitializingBean(org.springframework.beans.factory.InitializingBean) RequestCycle(org.apache.wicket.request.cycle.RequestCycle) Value(org.springframework.beans.factory.annotation.Value) TypeSystemDescription(org.apache.uima.resource.metadata.TypeSystemDescription) DocumentMetaData(de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData) Matcher(java.util.regex.Matcher) User(de.tudarmstadt.ukp.clarin.webanno.security.model.User) Map(java.util.Map) Project(de.tudarmstadt.ukp.clarin.webanno.model.Project) UIMAException(org.apache.uima.UIMAException) JCas(org.apache.uima.jcas.JCas) Logger(org.slf4j.Logger) PROJECT_FOLDER(de.tudarmstadt.ukp.clarin.webanno.api.ProjectService.PROJECT_FOLDER) IOException(java.io.IOException) FileUtils(org.apache.commons.io.FileUtils) AnnotationDocument(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationDocument) ANNOTATION_FOLDER(de.tudarmstadt.ukp.clarin.webanno.api.ProjectService.ANNOTATION_FOLDER) File(java.io.File) FileNotFoundException(java.io.FileNotFoundException) CasCreationUtils(org.apache.uima.util.CasCreationUtils) Component(org.springframework.stereotype.Component) FileFilter(java.io.FileFilter) CasDoctor(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor) AbstractRequestCycleListener(org.apache.wicket.request.cycle.AbstractRequestCycleListener) DOCUMENT_FOLDER(de.tudarmstadt.ukp.clarin.webanno.api.ProjectService.DOCUMENT_FOLDER) MetaDataKey(org.apache.wicket.MetaDataKey) MDC(org.slf4j.MDC) LastModifiedFileComparator(org.apache.commons.io.comparator.LastModifiedFileComparator) SourceDocument(de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument) Entry(java.util.Map.Entry) Pattern(java.util.regex.Pattern) Logging(de.tudarmstadt.ukp.clarin.webanno.support.logging.Logging) Matcher(java.util.regex.Matcher) IOException(java.io.IOException) MDC(org.slf4j.MDC) CasDoctorException(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctorException) DataRetrievalFailureException(org.springframework.dao.DataRetrievalFailureException) UIMAException(org.apache.uima.UIMAException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) CasDoctorException(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctorException) DataRetrievalFailureException(org.springframework.dao.DataRetrievalFailureException) DocumentMetaData(de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData) FileFilter(java.io.FileFilter) File(java.io.File)

Example 2 with CasDoctor

use of de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor in project webanno by webanno.

the class ProjectCasDoctorPanel method actionRepair.

private void actionRepair(AjaxRequestTarget aTarget, Form<?> aForm) throws IOException, UIMAException, ClassNotFoundException {
    casStorageService.disableCache();
    CasDoctor casDoctor = new CasDoctor();
    casDoctor.setApplicationContext(ApplicationContextProvider.getApplicationContext());
    casDoctor.setFatalChecks(false);
    casDoctor.setRepairClasses(formModel.repairs);
    Project project = getModelObject();
    formModel.messageSets = new ArrayList<>();
    for (SourceDocument sd : documentService.listSourceDocuments(project)) {
        {
            LogMessageSet messageSet = new LogMessageSet(sd.getName() + " [INITIAL]");
            JCas initialCas;
            if (documentService.existsInitialCas(sd)) {
                initialCas = documentService.readInitialCas(sd, false);
            } else {
                messageSet.messages.add(new LogMessage(getClass(), LogLevel.INFO, "Created initial CAS for [" + sd.getName() + "]"));
                initialCas = documentService.createInitialCas(sd, false);
            }
            casDoctor.repair(project, initialCas.getCas(), messageSet.messages);
            CasPersistenceUtils.writeSerializedCas(initialCas, documentService.getCasFile(sd, INITIAL_CAS_PSEUDO_USER));
            noticeIfThereAreNoMessages(messageSet);
            formModel.messageSets.add(messageSet);
        }
        for (AnnotationDocument ad : documentService.listAnnotationDocuments(sd)) {
            if (documentService.existsAnnotationCas(ad)) {
                LogMessageSet messageSet = new LogMessageSet(sd.getName() + " [" + ad.getUser() + "]");
                JCas userCas = documentService.readAnnotationCas(ad, false);
                casDoctor.repair(project, userCas.getCas(), messageSet.messages);
                CasPersistenceUtils.writeSerializedCas(userCas, documentService.getCasFile(ad.getDocument(), ad.getUser()));
                noticeIfThereAreNoMessages(messageSet);
                formModel.messageSets.add(messageSet);
            }
        }
    }
    aTarget.add(this);
}
Also used : Project(de.tudarmstadt.ukp.clarin.webanno.model.Project) LogMessage(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor.LogMessage) CasDoctor(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor) SourceDocument(de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument) JCas(org.apache.uima.jcas.JCas) AnnotationDocument(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationDocument)

Example 3 with CasDoctor

use of de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor in project webanno by webanno.

the class ProjectCasDoctorPanel method actionCheck.

private void actionCheck(AjaxRequestTarget aTarget, Form<?> aForm) throws IOException, UIMAException, ClassNotFoundException {
    casStorageService.disableCache();
    CasDoctor casDoctor = new CasDoctor();
    casDoctor.setApplicationContext(ApplicationContextProvider.getApplicationContext());
    casDoctor.setFatalChecks(false);
    casDoctor.setCheckClasses(CasDoctor.scanChecks());
    Project project = getModelObject();
    formModel.messageSets = new ArrayList<>();
    for (SourceDocument sd : documentService.listSourceDocuments(project)) {
        {
            LogMessageSet messageSet = new LogMessageSet(sd.getName() + " [INITIAL]");
            JCas initialCas;
            try {
                if (documentService.existsInitialCas(sd)) {
                    initialCas = documentService.readInitialCas(sd, false);
                } else {
                    messageSet.messages.add(new LogMessage(getClass(), LogLevel.INFO, "No initial CAS for [" + sd.getName() + "]"));
                    initialCas = documentService.createInitialCas(sd, false);
                }
                casDoctor.analyze(project, initialCas.getCas(), messageSet.messages);
            } catch (Exception e) {
                messageSet.messages.add(new LogMessage(getClass(), LogLevel.ERROR, "Error reading initial CAS for [" + sd.getName() + "]: " + e.getMessage()));
                LOG.error("Error reading initial CAS for [" + sd.getName() + "]", e);
            }
            noticeIfThereAreNoMessages(messageSet);
            formModel.messageSets.add(messageSet);
        }
        for (AnnotationDocument ad : documentService.listAnnotationDocuments(sd)) {
            if (documentService.existsAnnotationCas(ad)) {
                LogMessageSet messageSet = new LogMessageSet(sd.getName() + " [" + ad.getUser() + "]");
                JCas userCas = documentService.readAnnotationCas(ad, false);
                casDoctor.analyze(project, userCas.getCas(), messageSet.messages);
                noticeIfThereAreNoMessages(messageSet);
                formModel.messageSets.add(messageSet);
            }
        }
    }
    aTarget.add(this);
}
Also used : Project(de.tudarmstadt.ukp.clarin.webanno.model.Project) LogMessage(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor.LogMessage) CasDoctor(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor) SourceDocument(de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument) JCas(org.apache.uima.jcas.JCas) AnnotationDocument(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationDocument) UIMAException(org.apache.uima.UIMAException) IOException(java.io.IOException)

Example 4 with CasDoctor

use of de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor in project webanno by webanno.

the class RemoveDanglingRelationsRepairTest method test.

@Test
public void test() throws Exception {
    JCas jcas = JCasFactory.createJCas();
    jcas.setDocumentText("This is a test.");
    Token span1 = new Token(jcas, 0, 4);
    span1.addToIndexes();
    Token span2 = new Token(jcas, 6, 8);
    Dependency dep = new Dependency(jcas, 0, 8);
    dep.setGovernor(span1);
    dep.setDependent(span2);
    dep.addToIndexes();
    List<LogMessage> messages = new ArrayList<>();
    CasDoctor cd = new CasDoctor(RemoveDanglingRelationsRepair.class, AllFeatureStructuresIndexedCheck.class);
    // A project is not required for this check
    boolean result = cd.analyze(null, jcas.getCas(), messages);
    // A project is not required for this repair
    cd.repair(null, jcas.getCas(), messages);
    assertFalse(result);
    messages.forEach(System.out::println);
}
Also used : LogMessage(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor.LogMessage) ArrayList(java.util.ArrayList) CasDoctor(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency) Test(org.junit.Test)

Example 5 with CasDoctor

use of de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor in project webanno by webanno.

the class AllAnnotationsIndexedCheckTest method testOK.

@Test
public void testOK() throws Exception {
    TypeSystemDescription tsd = UIMAFramework.getResourceSpecifierFactory().createTypeSystemDescription();
    String refTypeName = "RefType";
    TypeDescription refTypeDesc = tsd.addType(refTypeName, null, CAS.TYPE_NAME_ANNOTATION);
    refTypeDesc.addFeature("ref", null, CAS.TYPE_NAME_ANNOTATION);
    CAS cas = CasCreationUtils.createCas(tsd, null, null);
    Type refType = cas.getTypeSystem().getType(refTypeName);
    // A regular index annotation
    AnnotationFS anno1 = cas.createAnnotation(cas.getAnnotationType(), 0, 1);
    cas.addFsToIndexes(anno1);
    // An indexed annotation but reachable through an indexe one (below)
    AnnotationFS anno2 = cas.createAnnotation(cas.getAnnotationType(), 0, 1);
    cas.addFsToIndexes(anno2);
    // An indexed annotation that references the non-indexed annotation above
    AnnotationFS anno3 = cas.createAnnotation(refType, 0, 1);
    anno3.setFeatureValue(refType.getFeatureByBaseName("ref"), anno2);
    cas.addFsToIndexes(anno3);
    List<LogMessage> messages = new ArrayList<>();
    CasDoctor cd = new CasDoctor(AllFeatureStructuresIndexedCheck.class);
    // A project is not required for this check
    boolean result = cd.analyze(null, cas, messages);
    messages.forEach(System.out::println);
    assertTrue(result);
}
Also used : AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) TypeSystemDescription(org.apache.uima.resource.metadata.TypeSystemDescription) CAS(org.apache.uima.cas.CAS) LogMessage(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor.LogMessage) ArrayList(java.util.ArrayList) CasDoctor(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor) TypeDescription(org.apache.uima.resource.metadata.TypeDescription) Test(org.junit.Test)

Aggregations

CasDoctor (de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor)6 LogMessage (de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor.LogMessage)5 JCas (org.apache.uima.jcas.JCas)4 AnnotationDocument (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationDocument)3 Project (de.tudarmstadt.ukp.clarin.webanno.model.Project)3 SourceDocument (de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument)3 ArrayList (java.util.ArrayList)3 CAS (org.apache.uima.cas.CAS)3 TypeSystemDescription (org.apache.uima.resource.metadata.TypeSystemDescription)3 Test (org.junit.Test)3 IOException (java.io.IOException)2 UIMAException (org.apache.uima.UIMAException)2 Type (org.apache.uima.cas.Type)2 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)2 TypeDescription (org.apache.uima.resource.metadata.TypeDescription)2 CasStorageService (de.tudarmstadt.ukp.clarin.webanno.api.CasStorageService)1 ANNOTATION_FOLDER (de.tudarmstadt.ukp.clarin.webanno.api.ProjectService.ANNOTATION_FOLDER)1 DOCUMENT_FOLDER (de.tudarmstadt.ukp.clarin.webanno.api.ProjectService.DOCUMENT_FOLDER)1 PROJECT_FOLDER (de.tudarmstadt.ukp.clarin.webanno.api.ProjectService.PROJECT_FOLDER)1 CasDoctorException (de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctorException)1