Examples with RecoveryFailedException - org.elasticsearch.indices.recovery.RecoveryFailedException

Example 1 with RecoveryFailedException

use of org.elasticsearch.indices.recovery.RecoveryFailedException in project elasticsearch by elastic.

the class IndexShard method internalPerformTranslogRecovery.

private void internalPerformTranslogRecovery(boolean skipTranslogRecovery, boolean indexExists, long maxUnsafeAutoIdTimestamp) throws IOException {
    if (state != IndexShardState.RECOVERING) {
        throw new IndexShardNotRecoveringException(shardId, state);
    }
    recoveryState.setStage(RecoveryState.Stage.VERIFY_INDEX);
    // also check here, before we apply the translog
    if (Booleans.isTrue(checkIndexOnStartup)) {
        try {
            checkIndex();
        } catch (IOException ex) {
            throw new RecoveryFailedException(recoveryState, "check index failed", ex);
        }
    }
    recoveryState.setStage(RecoveryState.Stage.TRANSLOG);
    final EngineConfig.OpenMode openMode;
    /* by default we recover and index and replay the translog but if the index
         * doesn't exist we create everything from the scratch. Yet, if the index
         * doesn't exist we don't need to worry about the skipTranslogRecovery since
         * there is no translog on a non-existing index.
         * The skipTranslogRecovery invariant is used if we do remote recovery since
         * there the translog isn't local but on the remote host, hence we can skip it.
         */
    if (indexExists == false) {
        openMode = EngineConfig.OpenMode.CREATE_INDEX_AND_TRANSLOG;
    } else if (skipTranslogRecovery) {
        openMode = EngineConfig.OpenMode.OPEN_INDEX_CREATE_TRANSLOG;
    } else {
        openMode = EngineConfig.OpenMode.OPEN_INDEX_AND_TRANSLOG;
    }
    final EngineConfig config = newEngineConfig(openMode, maxUnsafeAutoIdTimestamp);
    // we disable deletes since we allow for operations to be executed against the shard while recovering
    // but we need to make sure we don't loose deletes until we are done recovering
    config.setEnableGcDeletes(false);
    Engine newEngine = createNewEngine(config);
    verifyNotClosed();
    if (openMode == EngineConfig.OpenMode.OPEN_INDEX_AND_TRANSLOG) {
        // We set active because we are now writing operations to the engine; this way, if we go idle after some time and become inactive,
        // we still give sync'd flush a chance to run:
        active.set(true);
        newEngine.recoverFromTranslog();
    }
}

Also used : RecoveryFailedException(org.elasticsearch.indices.recovery.RecoveryFailedException) IOException(java.io.IOException) EngineConfig(org.elasticsearch.index.engine.EngineConfig) Engine(org.elasticsearch.index.engine.Engine)

Example 2 with RecoveryFailedException

use of org.elasticsearch.indices.recovery.RecoveryFailedException in project elasticsearch by elastic.

the class IndexShard method startRecovery.

public void startRecovery(RecoveryState recoveryState, PeerRecoveryTargetService recoveryTargetService, PeerRecoveryTargetService.RecoveryListener recoveryListener, RepositoriesService repositoriesService, BiConsumer<String, MappingMetaData> mappingUpdateConsumer, IndicesService indicesService) {
    // }
    assert recoveryState.getRecoverySource().equals(shardRouting.recoverySource());
    switch(recoveryState.getRecoverySource().getType()) {
        case EMPTY_STORE:
        case EXISTING_STORE:
            // mark the shard as recovering on the cluster state thread
            markAsRecovering("from store", recoveryState);
            threadPool.generic().execute(() -> {
                try {
                    if (recoverFromStore()) {
                        recoveryListener.onRecoveryDone(recoveryState);
                    }
                } catch (Exception e) {
                    recoveryListener.onRecoveryFailure(recoveryState, new RecoveryFailedException(recoveryState, null, e), true);
                }
            });
            break;
        case PEER:
            try {
                markAsRecovering("from " + recoveryState.getSourceNode(), recoveryState);
                recoveryTargetService.startRecovery(this, recoveryState.getSourceNode(), recoveryListener);
            } catch (Exception e) {
                failShard("corrupted preexisting index", e);
                recoveryListener.onRecoveryFailure(recoveryState, new RecoveryFailedException(recoveryState, null, e), true);
            }
            break;
        case SNAPSHOT:
            // mark the shard as recovering on the cluster state thread
            markAsRecovering("from snapshot", recoveryState);
            SnapshotRecoverySource recoverySource = (SnapshotRecoverySource) recoveryState.getRecoverySource();
            threadPool.generic().execute(() -> {
                try {
                    final Repository repository = repositoriesService.repository(recoverySource.snapshot().getRepository());
                    if (restoreFromRepository(repository)) {
                        recoveryListener.onRecoveryDone(recoveryState);
                    }
                } catch (Exception e) {
                    recoveryListener.onRecoveryFailure(recoveryState, new RecoveryFailedException(recoveryState, null, e), true);
                }
            });
            break;
        case LOCAL_SHARDS:
            final IndexMetaData indexMetaData = indexSettings().getIndexMetaData();
            final Index mergeSourceIndex = indexMetaData.getMergeSourceIndex();
            final List<IndexShard> startedShards = new ArrayList<>();
            final IndexService sourceIndexService = indicesService.indexService(mergeSourceIndex);
            final int numShards = sourceIndexService != null ? sourceIndexService.getIndexSettings().getNumberOfShards() : -1;
            if (sourceIndexService != null) {
                for (IndexShard shard : sourceIndexService) {
                    if (shard.state() == IndexShardState.STARTED) {
                        startedShards.add(shard);
                    }
                }
            }
            if (numShards == startedShards.size()) {
                // mark the shard as recovering on the cluster state thread
                markAsRecovering("from local shards", recoveryState);
                threadPool.generic().execute(() -> {
                    try {
                        final Set<ShardId> shards = IndexMetaData.selectShrinkShards(shardId().id(), sourceIndexService.getMetaData(), +indexMetaData.getNumberOfShards());
                        if (recoverFromLocalShards(mappingUpdateConsumer, startedShards.stream().filter((s) -> shards.contains(s.shardId())).collect(Collectors.toList()))) {
                            recoveryListener.onRecoveryDone(recoveryState);
                        }
                    } catch (Exception e) {
                        recoveryListener.onRecoveryFailure(recoveryState, new RecoveryFailedException(recoveryState, null, e), true);
                    }
                });
            } else {
                final RuntimeException e;
                if (numShards == -1) {
                    e = new IndexNotFoundException(mergeSourceIndex);
                } else {
                    e = new IllegalStateException("not all shards from index " + mergeSourceIndex + " are started yet, expected " + numShards + " found " + startedShards.size() + " can't recover shard " + shardId());
                }
                throw e;
            }
            break;
        default:
            throw new IllegalArgumentException("Unknown recovery source " + recoveryState.getRecoverySource());
    }
}

Also used : IndexService(org.elasticsearch.index.IndexService) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) CheckIndex(org.apache.lucene.index.CheckIndex) Index(org.elasticsearch.index.Index) IndexFormatTooNewException(org.apache.lucene.index.IndexFormatTooNewException) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) IndexNotFoundException(org.elasticsearch.index.IndexNotFoundException) ClosedByInterruptException(java.nio.channels.ClosedByInterruptException) ThreadInterruptedException(org.apache.lucene.util.ThreadInterruptedException) RecoveryFailedException(org.elasticsearch.indices.recovery.RecoveryFailedException) EngineException(org.elasticsearch.index.engine.EngineException) IOException(java.io.IOException) ElasticsearchException(org.elasticsearch.ElasticsearchException) NoSuchFileException(java.nio.file.NoSuchFileException) TimeoutException(java.util.concurrent.TimeoutException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) RefreshFailedEngineException(org.elasticsearch.index.engine.RefreshFailedEngineException) FileNotFoundException(java.io.FileNotFoundException) IndexFormatTooOldException(org.apache.lucene.index.IndexFormatTooOldException) IndexMetaData(org.elasticsearch.cluster.metadata.IndexMetaData) Repository(org.elasticsearch.repositories.Repository) SnapshotRecoverySource(org.elasticsearch.cluster.routing.RecoverySource.SnapshotRecoverySource) RecoveryFailedException(org.elasticsearch.indices.recovery.RecoveryFailedException) IndexNotFoundException(org.elasticsearch.index.IndexNotFoundException)

Example 3 with RecoveryFailedException

use of org.elasticsearch.indices.recovery.RecoveryFailedException in project elasticsearch by elastic.

the class RecoveriesCollectionTests method testRecoveryTimeout.

public void testRecoveryTimeout() throws Exception {
    try (ReplicationGroup shards = createGroup(0)) {
        final RecoveriesCollection collection = new RecoveriesCollection(logger, threadPool, v -> {
        });
        final AtomicBoolean failed = new AtomicBoolean();
        final CountDownLatch latch = new CountDownLatch(1);
        final long recoveryId = startRecovery(collection, shards.getPrimaryNode(), shards.addReplica(), new PeerRecoveryTargetService.RecoveryListener() {

            @Override
            public void onRecoveryDone(RecoveryState state) {
                latch.countDown();
            }

            @Override
            public void onRecoveryFailure(RecoveryState state, RecoveryFailedException e, boolean sendShardFailure) {
                failed.set(true);
                latch.countDown();
            }
        }, TimeValue.timeValueMillis(100));
        try {
            latch.await(30, TimeUnit.SECONDS);
            assertTrue("recovery failed to timeout", failed.get());
        } finally {
            collection.cancelRecovery(recoveryId, "meh");
        }
    }
}

Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) PeerRecoveryTargetService(org.elasticsearch.indices.recovery.PeerRecoveryTargetService) RecoveryFailedException(org.elasticsearch.indices.recovery.RecoveryFailedException) RecoveriesCollection(org.elasticsearch.indices.recovery.RecoveriesCollection) CountDownLatch(java.util.concurrent.CountDownLatch) RecoveryState(org.elasticsearch.indices.recovery.RecoveryState)

Aggregations

RecoveryFailedException (org.elasticsearch.indices.recovery.RecoveryFailedException)3 IOException (java.io.IOException)2 FileNotFoundException (java.io.FileNotFoundException)1 ClosedByInterruptException (java.nio.channels.ClosedByInterruptException)1 NoSuchFileException (java.nio.file.NoSuchFileException)1 ArrayList (java.util.ArrayList)1 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)1 CountDownLatch (java.util.concurrent.CountDownLatch)1 TimeoutException (java.util.concurrent.TimeoutException)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 CheckIndex (org.apache.lucene.index.CheckIndex)1 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)1 IndexFormatTooNewException (org.apache.lucene.index.IndexFormatTooNewException)1 IndexFormatTooOldException (org.apache.lucene.index.IndexFormatTooOldException)1 AlreadyClosedException (org.apache.lucene.store.AlreadyClosedException)1 ThreadInterruptedException (org.apache.lucene.util.ThreadInterruptedException)1 ElasticsearchException (org.elasticsearch.ElasticsearchException)1 IndexMetaData (org.elasticsearch.cluster.metadata.IndexMetaData)1 SnapshotRecoverySource (org.elasticsearch.cluster.routing.RecoverySource.SnapshotRecoverySource)1 Index (org.elasticsearch.index.Index)1