Search in sources :

Example 16 with RecoveryState

use of org.elasticsearch.indices.recovery.RecoveryState in project elasticsearch by elastic.

the class StoreRecovery method internalRecoverFromStore.

/**
     * Recovers the state of the shard from the store.
     */
private void internalRecoverFromStore(IndexShard indexShard) throws IndexShardRecoveryException {
    final RecoveryState recoveryState = indexShard.recoveryState();
    final boolean indexShouldExists = recoveryState.getRecoverySource().getType() != RecoverySource.Type.EMPTY_STORE;
    indexShard.prepareForIndexRecovery();
    long version = -1;
    SegmentInfos si = null;
    final Store store = indexShard.store();
    store.incRef();
    try {
        try {
            store.failIfCorrupted();
            try {
                si = store.readLastCommittedSegmentsInfo();
            } catch (Exception e) {
                String files = "_unknown_";
                try {
                    files = Arrays.toString(store.directory().listAll());
                } catch (Exception inner) {
                    inner.addSuppressed(e);
                    files += " (failure=" + ExceptionsHelper.detailedMessage(inner) + ")";
                }
                if (indexShouldExists) {
                    throw new IndexShardRecoveryException(shardId, "shard allocated for local recovery (post api), should exist, but doesn't, current files: " + files, e);
                }
            }
            if (si != null) {
                if (indexShouldExists) {
                    version = si.getVersion();
                } else {
                    // it exists on the directory, but shouldn't exist on the FS, its a leftover (possibly dangling)
                    // its a "new index create" API, we have to do something, so better to clean it than use same data
                    logger.trace("cleaning existing shard, shouldn't exists");
                    Lucene.cleanLuceneIndex(store.directory());
                    si = null;
                }
            }
        } catch (Exception e) {
            throw new IndexShardRecoveryException(shardId, "failed to fetch index version after copying it over", e);
        }
        recoveryState.getIndex().updateVersion(version);
        if (recoveryState.getRecoverySource().getType() == RecoverySource.Type.LOCAL_SHARDS) {
            assert indexShouldExists;
            indexShard.skipTranslogRecovery(IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP);
        } else {
            // since we recover from local, just fill the files and size
            try {
                final RecoveryState.Index index = recoveryState.getIndex();
                if (si != null) {
                    addRecoveredFileDetails(si, store, index);
                }
            } catch (IOException e) {
                logger.debug("failed to list file details", e);
            }
            indexShard.performTranslogRecovery(indexShouldExists);
        }
        indexShard.finalizeRecovery();
        indexShard.postRecovery("post recovery from shard_store");
    } catch (EngineException | IOException e) {
        throw new IndexShardRecoveryException(shardId, "failed to recover from gateway", e);
    } finally {
        store.decRef();
    }
}
Also used : SegmentInfos(org.apache.lucene.index.SegmentInfos) EngineException(org.elasticsearch.index.engine.EngineException) Store(org.elasticsearch.index.store.Store) IOException(java.io.IOException) RecoveryState(org.elasticsearch.indices.recovery.RecoveryState) IndexShardRestoreFailedException(org.elasticsearch.index.snapshots.IndexShardRestoreFailedException) EngineException(org.elasticsearch.index.engine.EngineException) IOException(java.io.IOException)

Example 17 with RecoveryState

use of org.elasticsearch.indices.recovery.RecoveryState in project elasticsearch by elastic.

the class StoreRecovery method executeRecovery.

/**
     * Recovers the state of the shard from the store.
     */
private boolean executeRecovery(final IndexShard indexShard, Runnable recoveryRunnable) throws IndexShardRecoveryException {
    try {
        recoveryRunnable.run();
        // Check that the gateway didn't leave the shard in init or recovering stage. it is up to the gateway
        // to call post recovery.
        final IndexShardState shardState = indexShard.state();
        final RecoveryState recoveryState = indexShard.recoveryState();
        assert shardState != IndexShardState.CREATED && shardState != IndexShardState.RECOVERING : "recovery process of " + shardId + " didn't get to post_recovery. shardState [" + shardState + "]";
        if (logger.isTraceEnabled()) {
            RecoveryState.Index index = recoveryState.getIndex();
            StringBuilder sb = new StringBuilder();
            sb.append("    index    : files           [").append(index.totalFileCount()).append("] with total_size [").append(new ByteSizeValue(index.totalBytes())).append("], took[").append(TimeValue.timeValueMillis(index.time())).append("]\n");
            sb.append("             : recovered_files [").append(index.recoveredFileCount()).append("] with total_size [").append(new ByteSizeValue(index.recoveredBytes())).append("]\n");
            sb.append("             : reusing_files   [").append(index.reusedFileCount()).append("] with total_size [").append(new ByteSizeValue(index.reusedBytes())).append("]\n");
            sb.append("    verify_index    : took [").append(TimeValue.timeValueMillis(recoveryState.getVerifyIndex().time())).append("], check_index [").append(timeValueMillis(recoveryState.getVerifyIndex().checkIndexTime())).append("]\n");
            sb.append("    translog : number_of_operations [").append(recoveryState.getTranslog().recoveredOperations()).append("], took [").append(TimeValue.timeValueMillis(recoveryState.getTranslog().time())).append("]");
            logger.trace("recovery completed from [shard_store], took [{}]\n{}", timeValueMillis(recoveryState.getTimer().time()), sb);
        } else if (logger.isDebugEnabled()) {
            logger.debug("recovery completed from [shard_store], took [{}]", timeValueMillis(recoveryState.getTimer().time()));
        }
        return true;
    } catch (IndexShardRecoveryException e) {
        if (indexShard.state() == IndexShardState.CLOSED) {
            // got closed on us, just ignore this recovery
            return false;
        }
        if ((e.getCause() instanceof IndexShardClosedException) || (e.getCause() instanceof IndexShardNotStartedException)) {
            // got closed on us, just ignore this recovery
            return false;
        }
        throw e;
    } catch (IndexShardClosedException | IndexShardNotStartedException e) {
    } catch (Exception e) {
        if (indexShard.state() == IndexShardState.CLOSED) {
            // got closed on us, just ignore this recovery
            return false;
        }
        throw new IndexShardRecoveryException(shardId, "failed recovery", e);
    }
    return false;
}
Also used : ByteSizeValue(org.elasticsearch.common.unit.ByteSizeValue) RecoveryState(org.elasticsearch.indices.recovery.RecoveryState) IndexShardRestoreFailedException(org.elasticsearch.index.snapshots.IndexShardRestoreFailedException) EngineException(org.elasticsearch.index.engine.EngineException) IOException(java.io.IOException)

Example 18 with RecoveryState

use of org.elasticsearch.indices.recovery.RecoveryState in project elasticsearch by elastic.

the class OldIndexBackwardsCompatibilityIT method assertIndexSanity.

void assertIndexSanity(String indexName, Version indexCreated) {
    GetIndexResponse getIndexResponse = client().admin().indices().prepareGetIndex().addIndices(indexName).get();
    assertEquals(1, getIndexResponse.indices().length);
    assertEquals(indexName, getIndexResponse.indices()[0]);
    Version actualVersionCreated = Version.indexCreated(getIndexResponse.getSettings().get(indexName));
    assertEquals(indexCreated, actualVersionCreated);
    ensureYellow(indexName);
    RecoveryResponse recoveryResponse = client().admin().indices().prepareRecoveries(indexName).setDetailed(true).setActiveOnly(false).get();
    boolean foundTranslog = false;
    for (List<RecoveryState> states : recoveryResponse.shardRecoveryStates().values()) {
        for (RecoveryState state : states) {
            if (state.getStage() == RecoveryState.Stage.DONE && state.getPrimary() && state.getRecoverySource().getType() == RecoverySource.Type.EXISTING_STORE) {
                assertFalse("more than one primary recoverd?", foundTranslog);
                assertNotEquals(0, state.getTranslog().recoveredOperations());
                foundTranslog = true;
            }
        }
    }
    assertTrue("expected translog but nothing was recovered", foundTranslog);
    IndicesSegmentResponse segmentsResponse = client().admin().indices().prepareSegments(indexName).get();
    IndexSegments segments = segmentsResponse.getIndices().get(indexName);
    int numCurrent = 0;
    int numBWC = 0;
    for (IndexShardSegments indexShardSegments : segments) {
        for (ShardSegments shardSegments : indexShardSegments) {
            for (Segment segment : shardSegments) {
                if (indexCreated.luceneVersion.equals(segment.version)) {
                    numBWC++;
                    if (Version.CURRENT.luceneVersion.equals(segment.version)) {
                        numCurrent++;
                    }
                } else if (Version.CURRENT.luceneVersion.equals(segment.version)) {
                    numCurrent++;
                } else {
                    fail("unexpected version " + segment.version);
                }
            }
        }
    }
    assertNotEquals("expected at least 1 current segment after translog recovery", 0, numCurrent);
    assertNotEquals("expected at least 1 old segment", 0, numBWC);
    SearchResponse test = client().prepareSearch(indexName).get();
    assertThat(test.getHits().getTotalHits(), greaterThanOrEqualTo(1L));
}
Also used : IndicesSegmentResponse(org.elasticsearch.action.admin.indices.segments.IndicesSegmentResponse) IndexShardSegments(org.elasticsearch.action.admin.indices.segments.IndexShardSegments) IndexSegments(org.elasticsearch.action.admin.indices.segments.IndexSegments) Segment(org.elasticsearch.index.engine.Segment) RecoveryResponse(org.elasticsearch.action.admin.indices.recovery.RecoveryResponse) SearchResponse(org.elasticsearch.action.search.SearchResponse) Version(org.elasticsearch.Version) GetIndexResponse(org.elasticsearch.action.admin.indices.get.GetIndexResponse) ShardSegments(org.elasticsearch.action.admin.indices.segments.ShardSegments) IndexShardSegments(org.elasticsearch.action.admin.indices.segments.IndexShardSegments) RecoveryState(org.elasticsearch.indices.recovery.RecoveryState)

Example 19 with RecoveryState

use of org.elasticsearch.indices.recovery.RecoveryState in project elasticsearch by elastic.

the class IndicesClusterStateService method createShard.

private void createShard(DiscoveryNodes nodes, RoutingTable routingTable, ShardRouting shardRouting, ClusterState state) {
    assert shardRouting.initializing() : "only allow shard creation for initializing shard but was " + shardRouting;
    DiscoveryNode sourceNode = null;
    if (shardRouting.recoverySource().getType() == Type.PEER) {
        sourceNode = findSourceNodeForPeerRecovery(logger, routingTable, nodes, shardRouting);
        if (sourceNode == null) {
            logger.trace("ignoring initializing shard {} - no source node can be found.", shardRouting.shardId());
            return;
        }
    }
    try {
        logger.debug("{} creating shard", shardRouting.shardId());
        RecoveryState recoveryState = new RecoveryState(shardRouting, nodes.getLocalNode(), sourceNode);
        indicesService.createShard(shardRouting, recoveryState, recoveryTargetService, new RecoveryListener(shardRouting), repositoriesService, failedShardHandler);
    } catch (Exception e) {
        failAndRemoveShard(shardRouting, true, "failed to create shard", e, state);
    }
}
Also used : DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) RecoveryState(org.elasticsearch.indices.recovery.RecoveryState) ShardNotFoundException(org.elasticsearch.index.shard.ShardNotFoundException) ShardLockObtainFailedException(org.elasticsearch.env.ShardLockObtainFailedException) LockObtainFailedException(org.apache.lucene.store.LockObtainFailedException) IndexShardRelocatedException(org.elasticsearch.index.shard.IndexShardRelocatedException) RecoveryFailedException(org.elasticsearch.indices.recovery.RecoveryFailedException) ResourceAlreadyExistsException(org.elasticsearch.ResourceAlreadyExistsException) IOException(java.io.IOException)

Example 20 with RecoveryState

use of org.elasticsearch.indices.recovery.RecoveryState in project elasticsearch by elastic.

the class ReusePeerRecoverySharedTest method testCase.

/**
     * Test peer reuse on recovery. This is shared between RecoverFromGatewayIT
     * and RecoveryBackwardsCompatibilityIT.
     *
     * @param indexSettings
     *            settings for the index to test
     * @param restartCluster
     *            runnable that will restart the cluster under test
     * @param logger
     *            logger for logging
     * @param useSyncIds
     *            should this use synced flush? can't use synced from in the bwc
     *            tests
     */
public static void testCase(Settings indexSettings, Runnable restartCluster, Logger logger, boolean useSyncIds) {
    /*
         * prevent any rebalance actions during the peer recovery if we run into
         * a relocation the reuse count will be 0 and this fails the test. We
         * are testing here if we reuse the files on disk after full restarts
         * for replicas.
         */
    assertAcked(client().admin().indices().prepareCreate("test").setSettings(Settings.builder().put(indexSettings).put(EnableAllocationDecider.INDEX_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), EnableAllocationDecider.Rebalance.NONE)));
    client().admin().cluster().prepareHealth().setWaitForGreenStatus().setTimeout("30s").get();
    logger.info("--> indexing docs");
    for (int i = 0; i < 1000; i++) {
        client().prepareIndex("test", "type").setSource("field", "value").execute().actionGet();
        if ((i % 200) == 0) {
            client().admin().indices().prepareFlush().execute().actionGet();
        }
    }
    if (randomBoolean()) {
        client().admin().indices().prepareFlush().execute().actionGet();
    }
    logger.info("--> running cluster health");
    client().admin().cluster().prepareHealth().setWaitForGreenStatus().setTimeout("30s").get();
    // just wait for merges
    client().admin().indices().prepareForceMerge("test").setMaxNumSegments(100).get();
    client().admin().indices().prepareFlush().setForce(true).get();
    if (useSyncIds == false) {
        logger.info("--> disabling allocation while the cluster is shut down");
        // Disable allocations while we are closing nodes
        client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder().put(EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING.getKey(), EnableAllocationDecider.Allocation.NONE)).get();
        logger.info("--> full cluster restart");
        restartCluster.run();
        logger.info("--> waiting for cluster to return to green after first shutdown");
        client().admin().cluster().prepareHealth().setWaitForGreenStatus().setTimeout("30s").get();
    } else {
        logger.info("--> trying to sync flush");
        assertEquals(client().admin().indices().prepareSyncedFlush("test").get().failedShards(), 0);
        assertSyncIdsNotNull();
    }
    logger.info("--> disabling allocation while the cluster is shut down{}", useSyncIds ? "" : " a second time");
    // Disable allocations while we are closing nodes
    client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder().put(EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING.getKey(), EnableAllocationDecider.Allocation.NONE)).get();
    logger.info("--> full cluster restart");
    restartCluster.run();
    logger.info("--> waiting for cluster to return to green after {}shutdown", useSyncIds ? "" : "second ");
    client().admin().cluster().prepareHealth().setWaitForGreenStatus().setTimeout("30s").get();
    if (useSyncIds) {
        assertSyncIdsNotNull();
    }
    RecoveryResponse recoveryResponse = client().admin().indices().prepareRecoveries("test").get();
    for (RecoveryState recoveryState : recoveryResponse.shardRecoveryStates().get("test")) {
        long recovered = 0;
        for (RecoveryState.File file : recoveryState.getIndex().fileDetails()) {
            if (file.name().startsWith("segments")) {
                recovered += file.length();
            }
        }
        if (!recoveryState.getPrimary() && (useSyncIds == false)) {
            logger.info("--> replica shard {} recovered from {} to {}, recovered {}, reuse {}", recoveryState.getShardId().getId(), recoveryState.getSourceNode().getName(), recoveryState.getTargetNode().getName(), recoveryState.getIndex().recoveredBytes(), recoveryState.getIndex().reusedBytes());
            assertThat("no bytes should be recovered", recoveryState.getIndex().recoveredBytes(), equalTo(recovered));
            assertThat("data should have been reused", recoveryState.getIndex().reusedBytes(), greaterThan(0L));
            // we have to recover the segments file since we commit the translog ID on engine startup
            assertThat("all bytes should be reused except of the segments file", recoveryState.getIndex().reusedBytes(), equalTo(recoveryState.getIndex().totalBytes() - recovered));
            assertThat("no files should be recovered except of the segments file", recoveryState.getIndex().recoveredFileCount(), equalTo(1));
            assertThat("all files should be reused except of the segments file", recoveryState.getIndex().reusedFileCount(), equalTo(recoveryState.getIndex().totalFileCount() - 1));
            assertThat("> 0 files should be reused", recoveryState.getIndex().reusedFileCount(), greaterThan(0));
        } else {
            if (useSyncIds && !recoveryState.getPrimary()) {
                logger.info("--> replica shard {} recovered from {} to {} using sync id, recovered {}, reuse {}", recoveryState.getShardId().getId(), recoveryState.getSourceNode().getName(), recoveryState.getTargetNode().getName(), recoveryState.getIndex().recoveredBytes(), recoveryState.getIndex().reusedBytes());
            }
            assertThat(recoveryState.getIndex().recoveredBytes(), equalTo(0L));
            assertThat(recoveryState.getIndex().reusedBytes(), equalTo(recoveryState.getIndex().totalBytes()));
            assertThat(recoveryState.getIndex().recoveredFileCount(), equalTo(0));
            assertThat(recoveryState.getIndex().reusedFileCount(), equalTo(recoveryState.getIndex().totalFileCount()));
        }
    }
}
Also used : RecoveryState(org.elasticsearch.indices.recovery.RecoveryState) RecoveryResponse(org.elasticsearch.action.admin.indices.recovery.RecoveryResponse)

Aggregations

RecoveryState (org.elasticsearch.indices.recovery.RecoveryState)29 DiscoveryNode (org.elasticsearch.cluster.node.DiscoveryNode)16 ShardRouting (org.elasticsearch.cluster.routing.ShardRouting)9 Settings (org.elasticsearch.common.settings.Settings)7 Store (org.elasticsearch.index.store.Store)6 IOException (java.io.IOException)5 TestShardRouting (org.elasticsearch.cluster.routing.TestShardRouting)5 RecoveryResponse (org.elasticsearch.action.admin.indices.recovery.RecoveryResponse)4 UnassignedInfo (org.elasticsearch.cluster.routing.UnassignedInfo)4 EngineException (org.elasticsearch.index.engine.EngineException)4 Matchers.containsString (org.hamcrest.Matchers.containsString)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 List (java.util.List)3 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)3 Version (org.elasticsearch.Version)3 IndexMetaData (org.elasticsearch.cluster.metadata.IndexMetaData)3 IndexService (org.elasticsearch.index.IndexService)3 IndexShard (org.elasticsearch.index.shard.IndexShard)3 Path (java.nio.file.Path)2