Search in sources :

Example 6 with CasStorageSession

use of de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.CasStorageSession in project webanno by webanno.

the class CasStorageServiceImpl method readOrCreateCas.

@Override
public CAS readOrCreateCas(SourceDocument aDocument, String aUsername, CasUpgradeMode aUpgradeMode, CasProvider aSupplier, CasAccessMode aAccessMode) throws IOException, CasSessionException {
    CasStorageSession session = CasStorageSession.get();
    // If the CAS is already present in the current session and the access mode is compatible
    // with the requested access mode, then we can return it immediately
    // THOUGHT: As it is written now - if the access more already recorded in the session
    // is insufficient, the access mode is upgraded because we simply continue after this
    // IF-clause. I am not entirely sure this is valid.
    // Case 1) CAS was added during the current session - the holder in the session is
    // replaced with an exclusive access CAS and when the session is closed, it is released.
    // Case 2) CAS was added during a parent session - the new exclusive access holder is added
    // to the current session and released as the current session is closed. The parent session
    // then still has the previously obtained read-only CAS - which at this point might be
    // stale if the CAS was changed during the exclusive access period
    Optional<SessionManagedCas> mCas = session.getManagedState(aDocument.getId(), aUsername);
    if (mCas.isPresent() && mCas.get().getMode().alsoPermits(aAccessMode)) {
        return mCas.get().getCas();
    }
    // If the CAS is not yet in the session, then we must get hold of it somehow...
    CasHolder casHolder;
    // If exclusive access is requested, then we check the CAS out of the exclusive access pool
    if (EXCLUSIVE_WRITE_ACCESS.equals(aAccessMode)) {
        CasKey key = null;
        CasHolder holder = null;
        try {
            log.trace("CAS storage session [{}]: trying to borrow CAS [{}]@[{}]({})", session.hashCode(), aUsername, aDocument.getName(), aDocument.getId());
            key = new CasKey(aDocument, aUsername);
            holder = borrowCas(key);
            // load it
            if (!holder.isCasSet()) {
                CasKey finalKey = key;
                CasHolder finalHolder = holder;
                CAS cas;
                // exclusive lock in CAS in readOrCreateUnmanagedCas
                try (CasStorageSession loaderSession = CasStorageSession.openNested(true)) {
                    SessionManagedCas mLoaderCas = loaderSession.add(aDocument.getId(), aUsername, EXCLUSIVE_WRITE_ACCESS, holder);
                    // Do not try to release the CAS when the loader session closes because in
                    // fact we won't even have set the CAS in the holder by then
                    mLoaderCas.setReleaseOnClose(false);
                    cas = readOrCreateUnmanagedCas(aDocument, aUsername, aSupplier, aUpgradeMode);
                }
                holder.setCas(cas);
                // Hook up releasing of the CAS when CAS.release() is called via the
                // CasStorageSession
                ((CASImpl) getRealCas(cas)).setOwner(_cas -> returnBorrowedCas(_cas, finalKey, finalHolder));
                log.trace("CAS storage session [{}]: borrowed CAS [{}] for [{}]@[{}]({}) loaded from storage", session.hashCode(), holder.getCasHashCode(), aUsername, aDocument.getName(), aDocument.getId());
            } else {
                log.trace("CAS storage session [{}]: borrowed CAS [{}] for [{}]@[{}]({}) was already in memory", session.hashCode(), holder.getCasHashCode(), aUsername, aDocument.getName(), aDocument.getId());
                transferCasOwnershipToCurrentThread(holder.getCas());
                repairAndUpgradeCasIfRequired(aDocument, aUsername, holder.getCas(), aUpgradeMode, ISOLATED_SESSION);
            }
            casHolder = holder;
        } catch (Exception e) {
            // If there was an exception, we need to return the CAS to the pool
            if (key != null && holder != null) {
                log.trace("CAS storage session [{}]: returning borrowed CAS [{}] for [{}]@[{}]({}) after failure to load CAS", session.hashCode(), holder.getCasHashCode(), aUsername, aDocument.getName(), aDocument.getId());
                try {
                    exclusiveAccessPool.returnObject(key, holder);
                    logExclusiveAccessHolders();
                } catch (Exception e1) {
                    log.error("Unable to return CAS to exclusive access pool", e1);
                }
            }
            casHolder = new CasHolder(key, e);
        }
    } else // else if shared read access is requested, then we try fetching it from the shared cache
    if (SHARED_READ_ONLY_ACCESS.equals(aAccessMode)) {
        if (!AUTO_CAS_UPGRADE.equals(aUpgradeMode)) {
            throw new IllegalArgumentException("When requsting a shared read-only CAS, the " + "access mode must be " + AUTO_CAS_UPGRADE);
        }
        // check for its existence
        try (WithExclusiveAccess access = new WithExclusiveAccess(aDocument, aUsername)) {
            // Since we promise to only read the CAS, we don't have to worry about it being
            // locked to a particular thread...
            casHolder = sharedAccessCache.get(new CasKey(aDocument, aUsername), (key) -> CasHolder.of(key, () -> getRealCas(readOrCreateUnmanagedCas(aDocument, aUsername, aSupplier, aUpgradeMode))));
        }
    } else // else if the special bypass mode is requested, then we fetch directly from disk
    if (UNMANAGED_ACCESS.equals(aAccessMode)) {
        // check for its existence
        try (WithExclusiveAccess access = new WithExclusiveAccess(aDocument, aUsername)) {
            casHolder = CasHolder.of(new CasKey(aDocument, aUsername), () -> readOrCreateUnmanagedCas(aDocument, aUsername, aSupplier, aUpgradeMode));
        }
    } else // else if the special bypass mode is requested, then we fetch directly from disk
    if (UNMANAGED_NON_INITIALIZING_ACCESS.equals(aAccessMode)) {
        // check for its existence
        try (WithExclusiveAccess access = new WithExclusiveAccess(aDocument, aUsername)) {
            casHolder = CasHolder.of(new CasKey(aDocument, aUsername), () -> readUnmanagedCas(aDocument, aUsername));
        }
    } else {
        throw new IllegalArgumentException("Unknown CAS access mode [" + aAccessMode + "]");
    }
    // If there was a problem retrieving the CAS, then we throw an exception
    if (casHolder.getException() != null) {
        if (casHolder.getException() instanceof IOException) {
            throw (IOException) casHolder.getException();
        }
        throw new IOException(casHolder.getException());
    }
    CAS cas = casHolder.getCas();
    if (aAccessMode.isSessionManaged()) {
        session.add(aDocument.getId(), aUsername, aAccessMode, cas).incrementReadCount();
    }
    return cas;
}
Also used : SessionManagedCas(de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.SessionManagedCas) CasHolder(de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.CasHolder) CAS(org.apache.uima.cas.CAS) CASImpl(org.apache.uima.cas.impl.CASImpl) CasKey(de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.CasKey) IOException(java.io.IOException) CasStorageSession(de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.CasStorageSession) CasDoctorException(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctorException) UIMAException(org.apache.uima.UIMAException) CasSessionException(de.tudarmstadt.ukp.clarin.webanno.api.casstorage.CasSessionException) FileNotFoundException(java.io.FileNotFoundException) DataRetrievalFailureException(org.springframework.dao.DataRetrievalFailureException) IOException(java.io.IOException)

Example 7 with CasStorageSession

use of de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.CasStorageSession in project webanno by webanno.

the class CasStorageServiceImpl method writeCas.

@Override
public void writeCas(SourceDocument aDocument, CAS aCas, String aUserName) throws IOException, CasSessionException {
    CasStorageSession session = CasStorageSession.get();
    // promise has been made. So we can then try to get exclusive access and save it.
    if (session.contains(aCas)) {
        if (!session.isWritingPermitted(aCas)) {
            throw new IOException("Session does not permit the CAS for user [" + aUserName + "] on document [" + aDocument.getName() + "](" + aDocument.getId() + ") in project [" + aDocument.getProject().getName() + "](" + aDocument.getProject().getId() + ") to be written");
        }
        // When overriding a stored CAS using an different CAS, the new CAS must be unmanaged
        // or must have been added to the session using a "special purpose". This is to avoid
        // having one CAS being accessible view two different username/docId pairs.
        Optional<SessionManagedCas> mCas = session.getManagedState(aDocument.getId(), aUserName);
        if (mCas.isPresent() && mCas.get().getCas() != aCas) {
            throw new IOException("Cannot override managed CAS [" + aUserName + "] on document [" + aDocument.getName() + "](" + aDocument.getId() + ") in project [" + aDocument.getProject().getName() + "](" + aDocument.getProject().getId() + ") with another managed CAS for user [" + mCas.get().getUserId() + "] on document [" + mCas.get().getSourceDocumentId() + "]");
        }
        realWriteCas(aDocument, aUserName, aCas);
    } else {
        try (WithExclusiveAccess access = new WithExclusiveAccess(aDocument, aUserName)) {
            realWriteCas(aDocument, aUserName, aCas);
            // could e.g. happen when saving an unmanaged CAS under a new username/docId pair.
            if (access.isCasSet() && access.getCas() != aCas) {
                access.setCas(aCas);
            }
        } catch (IOException e) {
            throw e;
        } catch (Exception e) {
            throw new IOException(e);
        }
    }
    // Drop the CAS from the shared CAS it gets re-loaded on the next access - no effect if the
    // CAS is not present in the shared cache
    sharedAccessCache.invalidate(new CasKey(aDocument, aUserName));
    session.getManagedState(aCas).ifPresent(SessionManagedCas::incrementWriteCount);
}
Also used : SessionManagedCas(de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.SessionManagedCas) IOException(java.io.IOException) CasKey(de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.CasKey) CasStorageSession(de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.CasStorageSession) CasDoctorException(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctorException) UIMAException(org.apache.uima.UIMAException) CasSessionException(de.tudarmstadt.ukp.clarin.webanno.api.casstorage.CasSessionException) FileNotFoundException(java.io.FileNotFoundException) DataRetrievalFailureException(org.springframework.dao.DataRetrievalFailureException) IOException(java.io.IOException)

Example 8 with CasStorageSession

use of de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.CasStorageSession in project webanno by webanno.

the class CasStorageServiceImplTest method testHighConcurrencyWithoutDeletion.

@Test
public void testHighConcurrencyWithoutDeletion() throws Exception {
    CasProvider initializer = () -> {
        try {
            CAS cas = createCas(mergeTypeSystems(asList(createTypeSystemDescription(), getInternalTypeSystem())));
            cas.setDocumentText(repeat("This is a test.\n", 100_000));
            return cas;
        } catch (ResourceInitializationException e) {
            throw new IOException(e);
        }
    };
    CasProvider badSeed = () -> {
        throw new IOException("This initializer should never be called!");
    };
    SourceDocument doc = makeSourceDocument(8l, 8l, "doc");
    String user = "annotator";
    try (CasStorageSession session = openNested()) {
        // Make sure the CAS exists so that the threads should never be forced to call the
        // the initializer
        sut.readOrCreateCas(doc, user, FORCE_CAS_UPGRADE, initializer, EXCLUSIVE_WRITE_ACCESS);
    }
    // We interleave all the primary and secondary tasks into the main tasks list
    // Primary tasks run for a certain number of iterations
    // Secondary tasks run as long as any primary task is still running
    List<Thread> tasks = new ArrayList<>();
    List<Thread> primaryTasks = new ArrayList<>();
    List<Thread> secondaryTasks = new ArrayList<>();
    int threadGroupCount = 4;
    int iterations = 100;
    for (int n = 0; n < threadGroupCount; n++) {
        ExclusiveReadWriteTask rw = new ExclusiveReadWriteTask(n, doc, user, badSeed, iterations);
        primaryTasks.add(rw);
        tasks.add(rw);
        Thread ro = new SharedReadOnlyTask(n, doc, user, badSeed);
        secondaryTasks.add(ro);
        tasks.add(ro);
        Thread un = new UnmanagedTask(n, doc, user, badSeed);
        secondaryTasks.add(un);
        tasks.add(un);
        Thread uni = new UnmanagedNonInitializingTask(n, doc, user);
        secondaryTasks.add(uni);
        tasks.add(uni);
    }
    log.info("---- Starting all threads ----");
    tasks.forEach(Thread::start);
    log.info("---- Wait for primary threads to complete ----");
    boolean done = false;
    while (!done) {
        long running = primaryTasks.stream().filter(Thread::isAlive).count();
        done = running == 0l;
        sleep(1000);
        log.info("running {}  complete {}%  rw {}  ro {}  un {}  uni {}", running, (writeCounter.get() * 100) / (threadGroupCount * iterations), writeCounter, managedReadCounter, unmanagedReadCounter, unmanagedNonInitializingReadCounter);
    }
    log.info("---- Wait for threads secondary threads to wrap up ----");
    rwTasksCompleted.set(true);
    for (Thread thread : secondaryTasks) {
        thread.join();
    }
    log.info("---- Test is done ----");
    assertThat(exception).isFalse();
}
Also used : SourceDocument(de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument) ArrayList(java.util.ArrayList) IOException(java.io.IOException) CasProvider(de.tudarmstadt.ukp.clarin.webanno.api.CasProvider) CAS(org.apache.uima.cas.CAS) ResourceInitializationException(org.apache.uima.resource.ResourceInitializationException) CasStorageSession(de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.CasStorageSession) Test(org.junit.Test)

Example 9 with CasStorageSession

use of de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.CasStorageSession in project webanno by webanno.

the class CasStorageServiceImplTest method testCasMetadataGetsCreated.

@Test
public void testCasMetadataGetsCreated() throws Exception {
    try (CasStorageSession casStorageSession = openNested(true)) {
        List<TypeSystemDescription> typeSystems = new ArrayList<>();
        typeSystems.add(createTypeSystemDescription());
        typeSystems.add(CasMetadataUtils.getInternalTypeSystem());
        JCas cas = JCasFactory.createJCas(mergeTypeSystems(typeSystems));
        casStorageSession.add("cas", EXCLUSIVE_WRITE_ACCESS, cas.getCas());
        SourceDocument doc = makeSourceDocument(2l, 2l, "test");
        String user = "test";
        sut.writeCas(doc, cas.getCas(), user);
        JCas cas2 = sut.readCas(doc, user).getJCas();
        List<CASMetadata> cmds = new ArrayList<>(select(cas2, CASMetadata.class));
        assertThat(cmds).hasSize(1);
        assertThat(cmds.get(0).getProjectId()).isEqualTo(doc.getProject().getId());
        assertThat(cmds.get(0).getSourceDocumentId()).isEqualTo(doc.getId());
        assertThat(cmds.get(0).getLastChangedOnDisk()).isEqualTo(sut.getCasTimestamp(doc, user).get());
    }
}
Also used : TypeSystemDescription(org.apache.uima.resource.metadata.TypeSystemDescription) TypeSystemDescriptionFactory.createTypeSystemDescription(org.apache.uima.fit.factory.TypeSystemDescriptionFactory.createTypeSystemDescription) ArrayList(java.util.ArrayList) SourceDocument(de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument) JCas(org.apache.uima.jcas.JCas) CasStorageSession(de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.CasStorageSession) CASMetadata(de.tudarmstadt.ukp.clarin.webanno.api.type.CASMetadata) Test(org.junit.Test)

Example 10 with CasStorageSession

use of de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.CasStorageSession in project webanno by webanno.

the class CasStorageServiceImplTest method testRestorationOfCasWhenSaveFails.

@Test
public void testRestorationOfCasWhenSaveFails() throws Exception {
    try (CasStorageSession casStorageSession = openNested(true)) {
        // Setup fixture
        SourceDocument doc = makeSourceDocument(6l, 6l, "test");
        String user = "test";
        File casFile = sut.getCasFile(doc, user);
        long casFileSize;
        long casFileLastModified;
        try (CasStorageSession session = openNested(true)) {
            createCasFile(doc, user, "This is a test");
            assertThat(casFile).exists();
            casFileSize = casFile.length();
            casFileLastModified = casFile.lastModified();
        }
        CAS mainCas = sut.readCas(doc, user, EXCLUSIVE_WRITE_ACCESS);
        // Wrap the CAS in a proxy so that UIMA cannot serialize it
        CAS guardedCas = (CAS) Proxy.newProxyInstance(getClass().getClassLoader(), new Class[] { CAS.class }, (proxy, method, args) -> method.invoke(mainCas, args));
        assertThatExceptionOfType(IOException.class).as("Saving fails because UIMA cannot cast the proxied CAS to something serializable").isThrownBy(() -> sut.writeCas(doc, guardedCas, user)).withRootCauseInstanceOf(ClassCastException.class);
        assertThat(casFile).exists().hasSize(casFileSize);
        assertThat(casFile.lastModified()).isEqualTo(casFileLastModified);
        assertThat(new File(casFile.getParentFile(), user + ".ser.old")).doesNotExist();
    }
}
Also used : CasStorageSession.openNested(de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.CasStorageSession.openNested) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) LoggerFactory(org.slf4j.LoggerFactory) Random(java.util.Random) UNMANAGED_NON_INITIALIZING_ACCESS(de.tudarmstadt.ukp.clarin.webanno.api.casstorage.CasAccessMode.UNMANAGED_NON_INITIALIZING_ACCESS) UNMANAGED_ACCESS(de.tudarmstadt.ukp.clarin.webanno.api.casstorage.CasAccessMode.UNMANAGED_ACCESS) RepositoryProperties(de.tudarmstadt.ukp.clarin.webanno.api.RepositoryProperties) TypeSystemDescription(org.apache.uima.resource.metadata.TypeSystemDescription) CASException(org.apache.uima.cas.CASException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) LayerConfigurationChangedEvent(de.tudarmstadt.ukp.clarin.webanno.api.event.LayerConfigurationChangedEvent) Arrays.asList(java.util.Arrays.asList) Thread.sleep(java.lang.Thread.sleep) CasFactory.createCas(org.apache.uima.fit.factory.CasFactory.createCas) TypeSystemDescriptionFactory.createTypeSystemDescription(org.apache.uima.fit.factory.TypeSystemDescriptionFactory.createTypeSystemDescription) JCas(org.apache.uima.jcas.JCas) CasStorageSession(de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.CasStorageSession) CasSessionException(de.tudarmstadt.ukp.clarin.webanno.api.casstorage.CasSessionException) StringUtils.repeat(org.apache.commons.lang3.StringUtils.repeat) FORCE_CAS_UPGRADE(de.tudarmstadt.ukp.clarin.webanno.api.CasUpgradeMode.FORCE_CAS_UPGRADE) FileNotFoundException(java.io.FileNotFoundException) AUTO_CAS_UPGRADE(de.tudarmstadt.ukp.clarin.webanno.api.CasUpgradeMode.AUTO_CAS_UPGRADE) List(java.util.List) EXCLUSIVE_WRITE_ACCESS(de.tudarmstadt.ukp.clarin.webanno.api.casstorage.CasAccessMode.EXCLUSIVE_WRITE_ACCESS) SourceDocument(de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument) CasMetadataUtils.getInternalTypeSystem(de.tudarmstadt.ukp.clarin.webanno.api.dao.CasMetadataUtils.getInternalTypeSystem) CasProvider(de.tudarmstadt.ukp.clarin.webanno.api.CasProvider) Proxy(java.lang.reflect.Proxy) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) CASMetadata(de.tudarmstadt.ukp.clarin.webanno.api.type.CASMetadata) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) CAS(org.apache.uima.cas.CAS) ResourceInitializationException(org.apache.uima.resource.ResourceInitializationException) NO_CAS_UPGRADE(de.tudarmstadt.ukp.clarin.webanno.api.CasUpgradeMode.NO_CAS_UPGRADE) ArrayList(java.util.ArrayList) CasFactory(org.apache.uima.fit.factory.CasFactory) CasCreationUtils.mergeTypeSystems(org.apache.uima.util.CasCreationUtils.mergeTypeSystems) Assertions.assertThatExceptionOfType(org.assertj.core.api.Assertions.assertThatExceptionOfType) JCasFactory(org.apache.uima.fit.factory.JCasFactory) Project(de.tudarmstadt.ukp.clarin.webanno.model.Project) INITIAL_CAS_PSEUDO_USER(de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.INITIAL_CAS_PSEUDO_USER) SHARED_READ_ONLY_ACCESS(de.tudarmstadt.ukp.clarin.webanno.api.casstorage.CasAccessMode.SHARED_READ_ONLY_ACCESS) Before(org.junit.Before) Logger(org.slf4j.Logger) IOException(java.io.IOException) Test(org.junit.Test) File(java.io.File) Rule(org.junit.Rule) JCasUtil.select(org.apache.uima.fit.util.JCasUtil.select) TemporaryFolder(org.junit.rules.TemporaryFolder) CAS(org.apache.uima.cas.CAS) SourceDocument(de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument) IOException(java.io.IOException) CasStorageSession(de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.CasStorageSession) File(java.io.File) Test(org.junit.Test)

Aggregations

CasStorageSession (de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.CasStorageSession)15 SourceDocument (de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument)10 Test (org.junit.Test)9 CAS (org.apache.uima.cas.CAS)8 IOException (java.io.IOException)7 JCas (org.apache.uima.jcas.JCas)6 File (java.io.File)5 FileNotFoundException (java.io.FileNotFoundException)5 ArrayList (java.util.ArrayList)5 UIMAException (org.apache.uima.UIMAException)5 CasSessionException (de.tudarmstadt.ukp.clarin.webanno.api.casstorage.CasSessionException)4 Project (de.tudarmstadt.ukp.clarin.webanno.model.Project)4 CasDoctorException (de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctorException)3 List (java.util.List)3 CasProvider (de.tudarmstadt.ukp.clarin.webanno.api.CasProvider)2 RepositoryProperties (de.tudarmstadt.ukp.clarin.webanno.api.RepositoryProperties)2 INITIAL_CAS_PSEUDO_USER (de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.INITIAL_CAS_PSEUDO_USER)2 CasKey (de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.CasKey)2 SessionManagedCas (de.tudarmstadt.ukp.clarin.webanno.api.dao.casstorage.SessionManagedCas)2 LayerConfigurationChangedEvent (de.tudarmstadt.ukp.clarin.webanno.api.event.LayerConfigurationChangedEvent)2