Search in sources :

Example 21 with AlreadyClosedException

use of org.apache.lucene.store.AlreadyClosedException in project lucene-solr by apache.

the class TestIndexWriterWithThreads method _testSingleThreadFailure.

// Runs test, with one thread, using the specific failure
// to trigger an IOException
public void _testSingleThreadFailure(MockDirectoryWrapper.Failure failure) throws IOException {
    MockDirectoryWrapper dir = newMockDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(2).setMergeScheduler(new ConcurrentMergeScheduler()).setCommitOnClose(false);
    if (iwc.getMergeScheduler() instanceof ConcurrentMergeScheduler) {
        iwc.setMergeScheduler(new SuppressingConcurrentMergeScheduler() {

            @Override
            protected boolean isOK(Throwable th) {
                return th instanceof AlreadyClosedException || (th instanceof IllegalStateException && th.getMessage().contains("this writer hit an unrecoverable error"));
            }
        });
    }
    IndexWriter writer = new IndexWriter(dir, iwc);
    final Document doc = new Document();
    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setStoreTermVectors(true);
    customType.setStoreTermVectorPositions(true);
    customType.setStoreTermVectorOffsets(true);
    doc.add(newField("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", customType));
    for (int i = 0; i < 6; i++) writer.addDocument(doc);
    dir.failOn(failure);
    failure.setDoFail();
    expectThrows(IOException.class, () -> {
        writer.addDocument(doc);
        writer.addDocument(doc);
        writer.commit();
    });
    failure.clearDoFail();
    expectThrows(AlreadyClosedException.class, () -> {
        writer.addDocument(doc);
        writer.commit();
        writer.close();
    });
    assertTrue(writer.deleter.isClosed());
    dir.close();
}
Also used : MockDirectoryWrapper(org.apache.lucene.store.MockDirectoryWrapper) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer)

Example 22 with AlreadyClosedException

use of org.apache.lucene.store.AlreadyClosedException in project lucene-solr by apache.

the class TestIndexWriterWithThreads method _testMultipleThreadsFailure.

// Runs test, with multiple threads, using the specific
// failure to trigger an IOException
public void _testMultipleThreadsFailure(MockDirectoryWrapper.Failure failure) throws Exception {
    int NUM_THREADS = 3;
    for (int iter = 0; iter < 2; iter++) {
        if (VERBOSE) {
            System.out.println("TEST: iter=" + iter);
        }
        MockDirectoryWrapper dir = newMockDirectory();
        IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(2).setMergeScheduler(new ConcurrentMergeScheduler()).setMergePolicy(newLogMergePolicy(4)).setCommitOnClose(false));
        ((ConcurrentMergeScheduler) writer.getConfig().getMergeScheduler()).setSuppressExceptions();
        IndexerThread[] threads = new IndexerThread[NUM_THREADS];
        for (int i = 0; i < NUM_THREADS; i++) threads[i] = new IndexerThread(writer, true);
        for (int i = 0; i < NUM_THREADS; i++) threads[i].start();
        Thread.sleep(10);
        dir.failOn(failure);
        failure.setDoFail();
        for (int i = 0; i < NUM_THREADS; i++) {
            threads[i].join();
            assertTrue("hit unexpected Throwable", threads[i].error == null);
        }
        boolean success = false;
        try {
            writer.commit();
            writer.close();
            success = true;
        } catch (AlreadyClosedException ace) {
            // OK: abort closes the writer
            assertTrue(writer.deleter.isClosed());
        } catch (IOException ioe) {
            writer.rollback();
            failure.clearDoFail();
        }
        if (VERBOSE) {
            System.out.println("TEST: success=" + success);
        }
        if (success) {
            IndexReader reader = DirectoryReader.open(dir);
            final Bits delDocs = MultiFields.getLiveDocs(reader);
            for (int j = 0; j < reader.maxDoc(); j++) {
                if (delDocs == null || !delDocs.get(j)) {
                    reader.document(j);
                    reader.getTermVectors(j);
                }
            }
            reader.close();
        }
        dir.close();
    }
}
Also used : MockDirectoryWrapper(org.apache.lucene.store.MockDirectoryWrapper) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Bits(org.apache.lucene.util.Bits) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) IOException(java.io.IOException)

Example 23 with AlreadyClosedException

use of org.apache.lucene.store.AlreadyClosedException in project crate by crate.

the class IndexShard method bumpPrimaryTerm.

private <E extends Exception> void bumpPrimaryTerm(long newPrimaryTerm, final CheckedRunnable<E> onBlocked, @Nullable ActionListener<Releasable> combineWithAction) {
    assert Thread.holdsLock(mutex);
    assert newPrimaryTerm > pendingPrimaryTerm || (newPrimaryTerm >= pendingPrimaryTerm && combineWithAction != null);
    assert getOperationPrimaryTerm() <= pendingPrimaryTerm;
    final CountDownLatch termUpdated = new CountDownLatch(1);
    asyncBlockOperations(new ActionListener<Releasable>() {

        @Override
        public void onFailure(final Exception e) {
            try {
                innerFail(e);
            } finally {
                if (combineWithAction != null) {
                    combineWithAction.onFailure(e);
                }
            }
        }

        private void innerFail(final Exception e) {
            try {
                failShard("exception during primary term transition", e);
            } catch (AlreadyClosedException ace) {
            // ignore, shard is already closed
            }
        }

        @Override
        public void onResponse(final Releasable releasable) {
            final RunOnce releaseOnce = new RunOnce(releasable::close);
            try {
                assert getOperationPrimaryTerm() <= pendingPrimaryTerm;
                termUpdated.await();
                // in the order submitted. We need to guard against another term bump
                if (getOperationPrimaryTerm() < newPrimaryTerm) {
                    replicationTracker.setOperationPrimaryTerm(newPrimaryTerm);
                    onBlocked.run();
                }
            } catch (final Exception e) {
                if (combineWithAction == null) {
                    // otherwise leave it to combineWithAction to release the permit
                    releaseOnce.run();
                }
                innerFail(e);
            } finally {
                if (combineWithAction != null) {
                    combineWithAction.onResponse(releasable);
                } else {
                    releaseOnce.run();
                }
            }
        }
    }, 30, TimeUnit.MINUTES);
    pendingPrimaryTerm = newPrimaryTerm;
    termUpdated.countDown();
}
Also used : RunOnce(org.elasticsearch.common.util.concurrent.RunOnce) Releasable(org.elasticsearch.common.lease.Releasable) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) CountDownLatch(java.util.concurrent.CountDownLatch) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) WriteStateException(org.elasticsearch.gateway.WriteStateException) IndexNotFoundException(org.elasticsearch.index.IndexNotFoundException) ClosedByInterruptException(java.nio.channels.ClosedByInterruptException) ThreadInterruptedException(org.apache.lucene.util.ThreadInterruptedException) RecoveryFailedException(org.elasticsearch.indices.recovery.RecoveryFailedException) EngineException(org.elasticsearch.index.engine.EngineException) IOException(java.io.IOException) ElasticsearchException(org.elasticsearch.ElasticsearchException) TimeoutException(java.util.concurrent.TimeoutException) RefreshFailedEngineException(org.elasticsearch.index.engine.RefreshFailedEngineException)

Example 24 with AlreadyClosedException

use of org.apache.lucene.store.AlreadyClosedException in project crate by crate.

the class IndexShard method updateShardState.

@Override
public void updateShardState(final ShardRouting newRouting, final long newPrimaryTerm, final BiConsumer<IndexShard, ActionListener<ResyncTask>> primaryReplicaSyncer, final long applyingClusterStateVersion, final Set<String> inSyncAllocationIds, final IndexShardRoutingTable routingTable) throws IOException {
    final ShardRouting currentRouting;
    synchronized (mutex) {
        currentRouting = this.shardRouting;
        assert currentRouting != null : "shardRouting must not be null";
        if (!newRouting.shardId().equals(shardId())) {
            throw new IllegalArgumentException("Trying to set a routing entry with shardId " + newRouting.shardId() + " on a shard with shardId " + shardId());
        }
        if (newRouting.isSameAllocation(currentRouting) == false) {
            throw new IllegalArgumentException("Trying to set a routing entry with a different allocation. Current " + currentRouting + ", new " + newRouting);
        }
        if (currentRouting.primary() && newRouting.primary() == false) {
            throw new IllegalArgumentException("illegal state: trying to move shard from primary mode to replica mode. Current " + currentRouting + ", new " + newRouting);
        }
        if (newRouting.primary()) {
            replicationTracker.updateFromMaster(applyingClusterStateVersion, inSyncAllocationIds, routingTable);
        }
        if (state == IndexShardState.POST_RECOVERY && newRouting.active()) {
            assert currentRouting.active() == false : "we are in POST_RECOVERY, but our shard routing is active " + currentRouting;
            assert currentRouting.isRelocationTarget() == false || currentRouting.primary() == false || replicationTracker.isPrimaryMode() : "a primary relocation is completed by the master, but primary mode is not active " + currentRouting;
            changeState(IndexShardState.STARTED, "global state is [" + newRouting.state() + "]");
        } else if (currentRouting.primary() && currentRouting.relocating() && replicationTracker.isRelocated() && (newRouting.relocating() == false || newRouting.equalsIgnoringMetadata(currentRouting) == false)) {
            // active primaries.
            throw new IndexShardRelocatedException(shardId(), "Shard is marked as relocated, cannot safely move to state " + newRouting.state());
        }
        assert newRouting.active() == false || state == IndexShardState.STARTED || state == IndexShardState.CLOSED : "routing is active, but local shard state isn't. routing: " + newRouting + ", local state: " + state;
        persistMetadata(path, indexSettings, newRouting, currentRouting, logger);
        final CountDownLatch shardStateUpdated = new CountDownLatch(1);
        if (newRouting.primary()) {
            if (newPrimaryTerm == pendingPrimaryTerm) {
                if (currentRouting.initializing() && newRouting.active()) {
                    if (currentRouting.isRelocationTarget() == false) {
                        // the master started a recovering primary, activate primary mode.
                        replicationTracker.activatePrimaryMode(getLocalCheckpoint());
                    }
                }
            } else {
                assert currentRouting.primary() == false : "term is only increased as part of primary promotion";
                /* Note that due to cluster state batching an initializing primary shard term can failed and re-assigned
                     * in one state causing it's term to be incremented. Note that if both current shard state and new
                     * shard state are initializing, we could replace the current shard and reinitialize it. It is however
                     * possible that this shard is being started. This can happen if:
                     * 1) Shard is post recovery and sends shard started to the master
                     * 2) Node gets disconnected and rejoins
                     * 3) Master assigns the shard back to the node
                     * 4) Master processes the shard started and starts the shard
                     * 5) The node process the cluster state where the shard is both started and primary term is incremented.
                     *
                     * We could fail the shard in that case, but this will cause it to be removed from the insync allocations list
                     * potentially preventing re-allocation.
                     */
                assert newRouting.initializing() == false : "a started primary shard should never update its term; " + "shard " + newRouting + ", " + "current term [" + pendingPrimaryTerm + "], " + "new term [" + newPrimaryTerm + "]";
                assert newPrimaryTerm > pendingPrimaryTerm : "primary terms can only go up; current term [" + pendingPrimaryTerm + "], new term [" + newPrimaryTerm + "]";
                /*
                     * Before this call returns, we are guaranteed that all future operations are delayed and so this happens before we
                     * increment the primary term. The latch is needed to ensure that we do not unblock operations before the primary term is
                     * incremented.
                     */
                // to prevent primary relocation handoff while resync is not completed
                boolean resyncStarted = primaryReplicaResyncInProgress.compareAndSet(false, true);
                if (resyncStarted == false) {
                    throw new IllegalStateException("cannot start resync while it's already in progress");
                }
                bumpPrimaryTerm(newPrimaryTerm, () -> {
                    shardStateUpdated.await();
                    assert pendingPrimaryTerm == newPrimaryTerm : "shard term changed on primary. expected [" + newPrimaryTerm + "] but was [" + pendingPrimaryTerm + "]" + ", current routing: " + currentRouting + ", new routing: " + newRouting;
                    assert getOperationPrimaryTerm() == newPrimaryTerm;
                    try {
                        replicationTracker.activatePrimaryMode(getLocalCheckpoint());
                        ensurePeerRecoveryRetentionLeasesExist();
                        /*
                                 * If this shard was serving as a replica shard when another shard was promoted to primary then
                                 * its Lucene index was reset during the primary term transition. In particular, the Lucene index
                                 * on this shard was reset to the global checkpoint and the operations above the local checkpoint
                                 * were reverted. If the other shard that was promoted to primary subsequently fails before the
                                 * primary/replica re-sync completes successfully and we are now being promoted, we have to restore
                                 * the reverted operations on this shard by replaying the translog to avoid losing acknowledged writes.
                                 */
                        final Engine engine = getEngine();
                        engine.restoreLocalHistoryFromTranslog((resettingEngine, snapshot) -> runTranslogRecovery(resettingEngine, snapshot, Engine.Operation.Origin.LOCAL_RESET, () -> {
                        }));
                        if (indexSettings.getIndexVersionCreated().onOrBefore(Version.V_3_0_1)) {
                            // an index that was created before sequence numbers were introduced may contain operations in its
                            // translog that do not have a sequence numbers. We want to make sure those operations will never
                            // be replayed as part of peer recovery to avoid an arbitrary mixture of operations with seq# (due
                            // to active indexing) and operations without a seq# coming from the translog. We therefore flush
                            // to create a lucene commit point to an empty translog file.
                            engine.flush(false, true);
                        }
                        /* Rolling the translog generation is not strictly needed here (as we will never have collisions between
                                 * sequence numbers in a translog generation in a new primary as it takes the last known sequence number
                                 * as a starting point), but it simplifies reasoning about the relationship between primary terms and
                                 * translog generations.
                                 */
                        engine.rollTranslogGeneration();
                        engine.fillSeqNoGaps(newPrimaryTerm);
                        replicationTracker.updateLocalCheckpoint(currentRouting.allocationId().getId(), getLocalCheckpoint());
                        primaryReplicaSyncer.accept(this, new ActionListener<ResyncTask>() {

                            @Override
                            public void onResponse(ResyncTask resyncTask) {
                                logger.info("primary-replica resync completed with {} operations", resyncTask.getResyncedOperations());
                                boolean resyncCompleted = primaryReplicaResyncInProgress.compareAndSet(true, false);
                                assert resyncCompleted : "primary-replica resync finished but was not started";
                            }

                            @Override
                            public void onFailure(Exception e) {
                                boolean resyncCompleted = primaryReplicaResyncInProgress.compareAndSet(true, false);
                                assert resyncCompleted : "primary-replica resync finished but was not started";
                                if (state == IndexShardState.CLOSED) {
                                // ignore, shutting down
                                } else {
                                    failShard("exception during primary-replica resync", e);
                                }
                            }
                        });
                    } catch (final AlreadyClosedException e) {
                    // okay, the index was deleted
                    }
                }, null);
            }
        }
        // set this last, once we finished updating all internal state.
        this.shardRouting = newRouting;
        assert this.shardRouting.primary() == false || // note that we use started and not active to avoid relocating shards
        this.shardRouting.started() == false || // if permits are blocked, we are still transitioning
        this.indexShardOperationPermits.isBlocked() || this.replicationTracker.isPrimaryMode() : "a started primary with non-pending operation term must be in primary mode " + this.shardRouting;
        shardStateUpdated.countDown();
    }
    if (currentRouting.active() == false && newRouting.active()) {
        indexEventListener.afterIndexShardStarted(this);
    }
    if (newRouting.equals(currentRouting) == false) {
        indexEventListener.shardRoutingChanged(this, currentRouting, newRouting);
    }
    if (indexSettings.isSoftDeleteEnabled() && useRetentionLeasesInPeerRecovery == false) {
        final RetentionLeases retentionLeases = replicationTracker.getRetentionLeases();
        final Set<ShardRouting> shardRoutings = new HashSet<>(routingTable.getShards());
        // include relocation targets
        shardRoutings.addAll(routingTable.assignedShards());
        if (shardRoutings.stream().allMatch(shr -> shr.assignedToNode() && retentionLeases.contains(ReplicationTracker.getPeerRecoveryRetentionLeaseId(shr)))) {
            useRetentionLeasesInPeerRecovery = true;
            turnOffTranslogRetention();
        }
    }
}
Also used : AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) CountDownLatch(java.util.concurrent.CountDownLatch) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) WriteStateException(org.elasticsearch.gateway.WriteStateException) IndexNotFoundException(org.elasticsearch.index.IndexNotFoundException) ClosedByInterruptException(java.nio.channels.ClosedByInterruptException) ThreadInterruptedException(org.apache.lucene.util.ThreadInterruptedException) RecoveryFailedException(org.elasticsearch.indices.recovery.RecoveryFailedException) EngineException(org.elasticsearch.index.engine.EngineException) IOException(java.io.IOException) ElasticsearchException(org.elasticsearch.ElasticsearchException) TimeoutException(java.util.concurrent.TimeoutException) RefreshFailedEngineException(org.elasticsearch.index.engine.RefreshFailedEngineException) RetentionLeases(org.elasticsearch.index.seqno.RetentionLeases) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) ResyncTask(org.elasticsearch.index.shard.PrimaryReplicaSyncer.ResyncTask) ReadOnlyEngine(org.elasticsearch.index.engine.ReadOnlyEngine) Engine(org.elasticsearch.index.engine.Engine) HashSet(java.util.HashSet)

Example 25 with AlreadyClosedException

use of org.apache.lucene.store.AlreadyClosedException in project crate by crate.

the class InternalEngine method deleteInLucene.

private DeleteResult deleteInLucene(Delete delete, DeletionStrategy plan) throws IOException {
    assert assertMaxSeqNoOfUpdatesIsAdvanced(delete.uid(), delete.seqNo(), false, false);
    try {
        if (softDeleteEnabled) {
            final ParsedDocument tombstone = engineConfig.getTombstoneDocSupplier().newDeleteTombstoneDoc(delete.id());
            assert tombstone.docs().size() == 1 : "Tombstone doc should have single doc [" + tombstone + "]";
            tombstone.updateSeqID(delete.seqNo(), delete.primaryTerm());
            tombstone.version().setLongValue(plan.versionOfDeletion);
            final ParseContext.Document doc = tombstone.docs().get(0);
            assert doc.getField(SeqNoFieldMapper.TOMBSTONE_NAME) != null : "Delete tombstone document but _tombstone field is not set [" + doc + " ]";
            doc.add(softDeletesField);
            if (plan.addStaleOpToLucene || plan.currentlyDeleted) {
                indexWriter.addDocument(doc);
            } else {
                indexWriter.softUpdateDocument(delete.uid(), doc, softDeletesField);
            }
        } else if (plan.currentlyDeleted == false) {
            // any exception that comes from this is a either an ACE or a fatal exception there
            // can't be any document failures  coming from this
            indexWriter.deleteDocuments(delete.uid());
        }
        if (plan.deleteFromLucene) {
            numDocDeletes.inc();
            versionMap.putDeleteUnderLock(delete.uid().bytes(), new DeleteVersionValue(plan.versionOfDeletion, delete.seqNo(), delete.primaryTerm(), engineConfig.getThreadPool().relativeTimeInMillis()));
        }
        return new DeleteResult(plan.versionOfDeletion, delete.primaryTerm(), delete.seqNo(), plan.currentlyDeleted == false);
    } catch (final Exception ex) {
        /*
             * Document level failures when deleting are unexpected, we likely
             * hit something fatal such as the Lucene index being corrupt, or
             * the Lucene document limit. We have already issued a sequence number
             * here so this is fatal, fail the engine.
             */
        if (ex instanceof AlreadyClosedException == false && indexWriter.getTragicException() == null) {
            final String reason = String.format(Locale.ROOT, "delete id[%s] origin [%s] seq#[%d] failed at the document level", delete.id(), delete.origin(), delete.seqNo());
            failEngine(reason, ex);
        }
        throw ex;
    }
}
Also used : ParsedDocument(org.elasticsearch.index.mapper.ParsedDocument) ParseContext(org.elasticsearch.index.mapper.ParseContext) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) LockObtainFailedException(org.apache.lucene.store.LockObtainFailedException) TranslogCorruptedException(org.elasticsearch.index.translog.TranslogCorruptedException) IOException(java.io.IOException)

Aggregations

AlreadyClosedException (org.apache.lucene.store.AlreadyClosedException)79 IOException (java.io.IOException)53 LockObtainFailedException (org.apache.lucene.store.LockObtainFailedException)16 CountDownLatch (java.util.concurrent.CountDownLatch)15 MockDirectoryWrapper (org.apache.lucene.store.MockDirectoryWrapper)14 TranslogCorruptedException (org.elasticsearch.index.translog.TranslogCorruptedException)13 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)12 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)11 Document (org.apache.lucene.document.Document)11 ElasticsearchException (org.elasticsearch.ElasticsearchException)11 ReleasableLock (org.elasticsearch.common.util.concurrent.ReleasableLock)10 UncheckedIOException (java.io.UncheckedIOException)9 ParsedDocument (org.elasticsearch.index.mapper.ParsedDocument)9 EOFException (java.io.EOFException)8 ArrayList (java.util.ArrayList)7 FileNotFoundException (java.io.FileNotFoundException)6 FileAlreadyExistsException (java.nio.file.FileAlreadyExistsException)6 NoSuchFileException (java.nio.file.NoSuchFileException)6 BrokenBarrierException (java.util.concurrent.BrokenBarrierException)6 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)6