Search in sources :

Example 1 with StoreFileMetadata

use of org.elasticsearch.index.store.StoreFileMetadata in project elasticsearch by elastic.

the class RecoverySourceHandler method phase1.

/**
     * Perform phase1 of the recovery operations. Once this {@link IndexCommit}
     * snapshot has been performed no commit operations (files being fsync'd)
     * are effectively allowed on this index until all recovery phases are done
     * <p>
     * Phase1 examines the segment files on the target node and copies over the
     * segments that are missing. Only segments that have the same size and
     * checksum can be reused
     */
public void phase1(final IndexCommit snapshot, final Translog.View translogView) {
    cancellableThreads.checkForCancel();
    // Total size of segment files that are recovered
    long totalSize = 0;
    // Total size of segment files that were able to be re-used
    long existingTotalSize = 0;
    final Store store = shard.store();
    store.incRef();
    try {
        StopWatch stopWatch = new StopWatch().start();
        final Store.MetadataSnapshot recoverySourceMetadata;
        try {
            recoverySourceMetadata = store.getMetadata(snapshot);
        } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
            shard.failShard("recovery", ex);
            throw ex;
        }
        for (String name : snapshot.getFileNames()) {
            final StoreFileMetaData md = recoverySourceMetadata.get(name);
            if (md == null) {
                logger.info("Snapshot differs from actual index for file: {} meta: {}", name, recoverySourceMetadata.asMap());
                throw new CorruptIndexException("Snapshot differs from actual index - maybe index was removed metadata has " + recoverySourceMetadata.asMap().size() + " files", name);
            }
        }
        // Generate a "diff" of all the identical, different, and missing
        // segment files on the target node, using the existing files on
        // the source node
        String recoverySourceSyncId = recoverySourceMetadata.getSyncId();
        String recoveryTargetSyncId = request.metadataSnapshot().getSyncId();
        final boolean recoverWithSyncId = recoverySourceSyncId != null && recoverySourceSyncId.equals(recoveryTargetSyncId);
        if (recoverWithSyncId) {
            final long numDocsTarget = request.metadataSnapshot().getNumDocs();
            final long numDocsSource = recoverySourceMetadata.getNumDocs();
            if (numDocsTarget != numDocsSource) {
                throw new IllegalStateException("try to recover " + request.shardId() + " from primary shard with sync id but number " + "of docs differ: " + numDocsSource + " (" + request.sourceNode().getName() + ", primary) vs " + numDocsTarget + "(" + request.targetNode().getName() + ")");
            }
            // we shortcut recovery here because we have nothing to copy. but we must still start the engine on the target.
            // so we don't return here
            logger.trace("skipping [phase1]- identical sync id [{}] found on both source and target", recoverySourceSyncId);
        } else {
            final Store.RecoveryDiff diff = recoverySourceMetadata.recoveryDiff(request.metadataSnapshot());
            for (StoreFileMetaData md : diff.identical) {
                response.phase1ExistingFileNames.add(md.name());
                response.phase1ExistingFileSizes.add(md.length());
                existingTotalSize += md.length();
                if (logger.isTraceEnabled()) {
                    logger.trace("recovery [phase1]: not recovering [{}], exist in local store and has checksum [{}]," + " size [{}]", md.name(), md.checksum(), md.length());
                }
                totalSize += md.length();
            }
            List<StoreFileMetaData> phase1Files = new ArrayList<>(diff.different.size() + diff.missing.size());
            phase1Files.addAll(diff.different);
            phase1Files.addAll(diff.missing);
            for (StoreFileMetaData md : phase1Files) {
                if (request.metadataSnapshot().asMap().containsKey(md.name())) {
                    logger.trace("recovery [phase1]: recovering [{}], exists in local store, but is different: remote [{}], local [{}]", md.name(), request.metadataSnapshot().asMap().get(md.name()), md);
                } else {
                    logger.trace("recovery [phase1]: recovering [{}], does not exist in remote", md.name());
                }
                response.phase1FileNames.add(md.name());
                response.phase1FileSizes.add(md.length());
                totalSize += md.length();
            }
            response.phase1TotalSize = totalSize;
            response.phase1ExistingTotalSize = existingTotalSize;
            logger.trace("recovery [phase1]: recovering_files [{}] with total_size [{}], reusing_files [{}] with total_size [{}]", response.phase1FileNames.size(), new ByteSizeValue(totalSize), response.phase1ExistingFileNames.size(), new ByteSizeValue(existingTotalSize));
            cancellableThreads.execute(() -> recoveryTarget.receiveFileInfo(response.phase1FileNames, response.phase1FileSizes, response.phase1ExistingFileNames, response.phase1ExistingFileSizes, translogView.totalOperations()));
            // How many bytes we've copied since we last called RateLimiter.pause
            final Function<StoreFileMetaData, OutputStream> outputStreamFactories = md -> new BufferedOutputStream(new RecoveryOutputStream(md, translogView), chunkSizeInBytes);
            sendFiles(store, phase1Files.toArray(new StoreFileMetaData[phase1Files.size()]), outputStreamFactories);
            // are deleted
            try {
                cancellableThreads.executeIO(() -> recoveryTarget.cleanFiles(translogView.totalOperations(), recoverySourceMetadata));
            } catch (RemoteTransportException | IOException targetException) {
                final IOException corruptIndexException;
                //   - maybe due to old segments without checksums or length only checks
                if ((corruptIndexException = ExceptionsHelper.unwrapCorruption(targetException)) != null) {
                    try {
                        final Store.MetadataSnapshot recoverySourceMetadata1 = store.getMetadata(snapshot);
                        StoreFileMetaData[] metadata = StreamSupport.stream(recoverySourceMetadata1.spliterator(), false).toArray(size -> new StoreFileMetaData[size]);
                        ArrayUtil.timSort(metadata, (o1, o2) -> {
                            // check small files first
                            return Long.compare(o1.length(), o2.length());
                        });
                        for (StoreFileMetaData md : metadata) {
                            cancellableThreads.checkForCancel();
                            logger.debug("checking integrity for file {} after remove corruption exception", md);
                            if (store.checkIntegrityNoException(md) == false) {
                                // we are corrupted on the primary -- fail!
                                shard.failShard("recovery", corruptIndexException);
                                logger.warn("Corrupted file detected {} checksum mismatch", md);
                                throw corruptIndexException;
                            }
                        }
                    } catch (IOException ex) {
                        targetException.addSuppressed(ex);
                        throw targetException;
                    }
                    // corruption has happened on the way to replica
                    RemoteTransportException exception = new RemoteTransportException("File corruption occurred on recovery but " + "checksums are ok", null);
                    exception.addSuppressed(targetException);
                    logger.warn((org.apache.logging.log4j.util.Supplier<?>) () -> new ParameterizedMessage("{} Remote file corruption during finalization of recovery on node {}. local checksum OK", shard.shardId(), request.targetNode()), corruptIndexException);
                    throw exception;
                } else {
                    throw targetException;
                }
            }
        }
        logger.trace("recovery [phase1]: took [{}]", stopWatch.totalTime());
        response.phase1Time = stopWatch.totalTime().millis();
    } catch (Exception e) {
        throw new RecoverFilesRecoveryException(request.shardId(), response.phase1FileNames.size(), new ByteSizeValue(totalSize), e);
    } finally {
        store.decRef();
    }
}
Also used : IndexCommit(org.apache.lucene.index.IndexCommit) CancellableThreads(org.elasticsearch.common.util.CancellableThreads) IndexShardClosedException(org.elasticsearch.index.shard.IndexShardClosedException) IndexFormatTooNewException(org.apache.lucene.index.IndexFormatTooNewException) Nullable(org.elasticsearch.common.Nullable) IndexShardRelocatedException(org.elasticsearch.index.shard.IndexShardRelocatedException) RecoveryEngineException(org.elasticsearch.index.engine.RecoveryEngineException) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) Function(java.util.function.Function) Supplier(java.util.function.Supplier) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) BufferedOutputStream(java.io.BufferedOutputStream) ArrayList(java.util.ArrayList) BytesArray(org.elasticsearch.common.bytes.BytesArray) StoreFileMetaData(org.elasticsearch.index.store.StoreFileMetaData) Settings(org.elasticsearch.common.settings.Settings) SequenceNumbersService(org.elasticsearch.index.seqno.SequenceNumbersService) Store(org.elasticsearch.index.store.Store) LocalCheckpointTracker(org.elasticsearch.index.seqno.LocalCheckpointTracker) Streams(org.elasticsearch.common.io.Streams) StreamSupport(java.util.stream.StreamSupport) IOContext(org.apache.lucene.store.IOContext) Releasable(org.elasticsearch.common.lease.Releasable) Loggers(org.elasticsearch.common.logging.Loggers) ByteSizeValue(org.elasticsearch.common.unit.ByteSizeValue) OutputStream(java.io.OutputStream) ArrayUtil(org.apache.lucene.util.ArrayUtil) IndexShardState(org.elasticsearch.index.shard.IndexShardState) IndexInput(org.apache.lucene.store.IndexInput) IndexShard(org.elasticsearch.index.shard.IndexShard) IOUtils(org.apache.lucene.util.IOUtils) IOException(java.io.IOException) StopWatch(org.elasticsearch.common.StopWatch) IndexFormatTooOldException(org.apache.lucene.index.IndexFormatTooOldException) ExceptionsHelper(org.elasticsearch.ExceptionsHelper) RemoteTransportException(org.elasticsearch.transport.RemoteTransportException) List(java.util.List) Logger(org.apache.logging.log4j.Logger) InputStreamIndexInput(org.elasticsearch.common.lucene.store.InputStreamIndexInput) Translog(org.elasticsearch.index.translog.Translog) RateLimiter(org.apache.lucene.store.RateLimiter) BufferedOutputStream(java.io.BufferedOutputStream) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) ByteSizeValue(org.elasticsearch.common.unit.ByteSizeValue) Store(org.elasticsearch.index.store.Store) IndexFormatTooOldException(org.apache.lucene.index.IndexFormatTooOldException) StoreFileMetaData(org.elasticsearch.index.store.StoreFileMetaData) Supplier(java.util.function.Supplier) BufferedOutputStream(java.io.BufferedOutputStream) RemoteTransportException(org.elasticsearch.transport.RemoteTransportException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) IOException(java.io.IOException) IndexShardClosedException(org.elasticsearch.index.shard.IndexShardClosedException) IndexFormatTooNewException(org.apache.lucene.index.IndexFormatTooNewException) IndexShardRelocatedException(org.elasticsearch.index.shard.IndexShardRelocatedException) RecoveryEngineException(org.elasticsearch.index.engine.RecoveryEngineException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) IOException(java.io.IOException) IndexFormatTooOldException(org.apache.lucene.index.IndexFormatTooOldException) RemoteTransportException(org.elasticsearch.transport.RemoteTransportException) StopWatch(org.elasticsearch.common.StopWatch) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) IndexFormatTooNewException(org.apache.lucene.index.IndexFormatTooNewException)

Example 2 with StoreFileMetadata

use of org.elasticsearch.index.store.StoreFileMetadata in project elasticsearch by elastic.

the class BlobStoreRepository method maybeRecalculateMetadataHash.

/**
     * This is a BWC layer to ensure we update the snapshots metadata with the corresponding hashes before we compare them.
     * The new logic for StoreFileMetaData reads the entire <tt>.si</tt> and <tt>segments.n</tt> files to strengthen the
     * comparison of the files on a per-segment / per-commit level.
     */
private static void maybeRecalculateMetadataHash(final BlobContainer blobContainer, final BlobStoreIndexShardSnapshot.FileInfo fileInfo, Store.MetadataSnapshot snapshot) throws Exception {
    final StoreFileMetaData metadata;
    if (fileInfo != null && (metadata = snapshot.get(fileInfo.physicalName())) != null) {
        if (metadata.hash().length > 0 && fileInfo.metadata().hash().length == 0) {
            // we might have multiple parts even though the file is small... make sure we read all of it.
            try (InputStream stream = new PartSliceStream(blobContainer, fileInfo)) {
                BytesRefBuilder builder = new BytesRefBuilder();
                Store.MetadataSnapshot.hashFile(builder, stream, fileInfo.length());
                // reset the file infos metadata hash
                BytesRef hash = fileInfo.metadata().hash();
                assert hash.length == 0;
                hash.bytes = builder.bytes();
                hash.offset = 0;
                hash.length = builder.length();
            }
        }
    }
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) StoreFileMetaData(org.elasticsearch.index.store.StoreFileMetaData) RateLimitingInputStream(org.elasticsearch.index.snapshots.blobstore.RateLimitingInputStream) FilterInputStream(java.io.FilterInputStream) SlicedInputStream(org.elasticsearch.index.snapshots.blobstore.SlicedInputStream) InputStream(java.io.InputStream) BytesRef(org.apache.lucene.util.BytesRef)

Example 3 with StoreFileMetadata

use of org.elasticsearch.index.store.StoreFileMetadata in project elasticsearch by elastic.

the class RecoverySourceHandlerTests method testHandleExceptinoOnSendSendFiles.

public void testHandleExceptinoOnSendSendFiles() throws Throwable {
    Settings settings = Settings.builder().put("indices.recovery.concurrent_streams", 1).put("indices.recovery.concurrent_small_file_streams", 1).build();
    final RecoverySettings recoverySettings = new RecoverySettings(settings, service);
    final StartRecoveryRequest request = new StartRecoveryRequest(shardId, new DiscoveryNode("b", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT), new DiscoveryNode("b", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT), null, randomBoolean(), randomNonNegativeLong(), randomBoolean() ? SequenceNumbersService.UNASSIGNED_SEQ_NO : 0L);
    Path tempDir = createTempDir();
    Store store = newStore(tempDir, false);
    AtomicBoolean failedEngine = new AtomicBoolean(false);
    RecoverySourceHandler handler = new RecoverySourceHandler(null, null, request, () -> 0L, e -> () -> {
    }, recoverySettings.getChunkSize().bytesAsInt(), Settings.EMPTY) {

        @Override
        protected void failEngine(IOException cause) {
            assertFalse(failedEngine.get());
            failedEngine.set(true);
        }
    };
    Directory dir = store.directory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
    int numDocs = randomIntBetween(10, 100);
    for (int i = 0; i < numDocs; i++) {
        Document document = new Document();
        document.add(new StringField("id", Integer.toString(i), Field.Store.YES));
        document.add(newField("field", randomUnicodeOfCodepointLengthBetween(1, 10), TextField.TYPE_STORED));
        writer.addDocument(document);
    }
    writer.commit();
    writer.close();
    Store.MetadataSnapshot metadata = store.getMetadata(null);
    List<StoreFileMetaData> metas = new ArrayList<>();
    for (StoreFileMetaData md : metadata) {
        metas.add(md);
    }
    final boolean throwCorruptedIndexException = randomBoolean();
    Store targetStore = newStore(createTempDir(), false);
    try {
        handler.sendFiles(store, metas.toArray(new StoreFileMetaData[0]), (md) -> {
            if (throwCorruptedIndexException) {
                throw new RuntimeException(new CorruptIndexException("foo", "bar"));
            } else {
                throw new RuntimeException("boom");
            }
        });
        fail("exception index");
    } catch (RuntimeException ex) {
        assertNull(ExceptionsHelper.unwrapCorruption(ex));
        if (throwCorruptedIndexException) {
            assertEquals(ex.getMessage(), "[File corruption occurred on recovery but checksums are ok]");
        } else {
            assertEquals(ex.getMessage(), "boom");
        }
    } catch (CorruptIndexException ex) {
        fail("not expected here");
    }
    assertFalse(failedEngine.get());
    IOUtils.close(store, targetStore);
}
Also used : Path(java.nio.file.Path) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) ArrayList(java.util.ArrayList) Store(org.elasticsearch.index.store.Store) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) ParsedDocument(org.elasticsearch.index.mapper.ParsedDocument) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) StoreFileMetaData(org.elasticsearch.index.store.StoreFileMetaData) StringField(org.apache.lucene.document.StringField) Settings(org.elasticsearch.common.settings.Settings) IndexSettings(org.elasticsearch.index.IndexSettings) ClusterSettings(org.elasticsearch.common.settings.ClusterSettings) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 4 with StoreFileMetadata

use of org.elasticsearch.index.store.StoreFileMetadata in project elasticsearch by elastic.

the class RecoveryStatusTests method testRenameTempFiles.

public void testRenameTempFiles() throws IOException {
    IndexService service = createIndex("foo");
    IndexShard indexShard = service.getShardOrNull(0);
    DiscoveryNode node = new DiscoveryNode("foo", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT);
    RecoveryTarget status = new RecoveryTarget(indexShard, node, new PeerRecoveryTargetService.RecoveryListener() {

        @Override
        public void onRecoveryDone(RecoveryState state) {
        }

        @Override
        public void onRecoveryFailure(RecoveryState state, RecoveryFailedException e, boolean sendShardFailure) {
        }
    }, version -> {
    });
    try (IndexOutput indexOutput = status.openAndPutIndexOutput("foo.bar", new StoreFileMetaData("foo.bar", 8 + CodecUtil.footerLength(), "9z51nw"), status.store())) {
        indexOutput.writeInt(1);
        IndexOutput openIndexOutput = status.getOpenIndexOutput("foo.bar");
        assertSame(openIndexOutput, indexOutput);
        openIndexOutput.writeInt(1);
        CodecUtil.writeFooter(indexOutput);
    }
    try {
        status.openAndPutIndexOutput("foo.bar", new StoreFileMetaData("foo.bar", 8 + CodecUtil.footerLength(), "9z51nw"), status.store());
        fail("file foo.bar is already opened and registered");
    } catch (IllegalStateException ex) {
        assertEquals("output for file [foo.bar] has already been created", ex.getMessage());
    // all well = it's already registered
    }
    status.removeOpenIndexOutputs("foo.bar");
    Set<String> strings = Sets.newHashSet(status.store().directory().listAll());
    String expectedFile = null;
    for (String file : strings) {
        if (Pattern.compile("recovery[.][\\w-]+[.]foo[.]bar").matcher(file).matches()) {
            expectedFile = file;
            break;
        }
    }
    assertNotNull(expectedFile);
    // we have to close it here otherwise rename fails since the write.lock is held by the engine
    indexShard.close("foo", false);
    status.renameAllTempFiles();
    strings = Sets.newHashSet(status.store().directory().listAll());
    assertTrue(strings.toString(), strings.contains("foo.bar"));
    assertFalse(strings.toString(), strings.contains(expectedFile));
    // we must fail the recovery because marking it as done will try to move the shard to POST_RECOVERY, which will fail because it's started
    status.fail(new RecoveryFailedException(status.state(), "end of test. OK.", null), false);
}
Also used : DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) IndexService(org.elasticsearch.index.IndexService) IndexShard(org.elasticsearch.index.shard.IndexShard) IndexOutput(org.apache.lucene.store.IndexOutput) StoreFileMetaData(org.elasticsearch.index.store.StoreFileMetaData)

Example 5 with StoreFileMetadata

use of org.elasticsearch.index.store.StoreFileMetadata in project elasticsearch by elastic.

the class IndexShard method doCheckIndex.

private void doCheckIndex() throws IOException {
    long timeNS = System.nanoTime();
    if (!Lucene.indexExists(store.directory())) {
        return;
    }
    BytesStreamOutput os = new BytesStreamOutput();
    PrintStream out = new PrintStream(os, false, StandardCharsets.UTF_8.name());
    if ("checksum".equals(checkIndexOnStartup)) {
        // physical verification only: verify all checksums for the latest commit
        IOException corrupt = null;
        MetadataSnapshot metadata = snapshotStoreMetadata();
        for (Map.Entry<String, StoreFileMetaData> entry : metadata.asMap().entrySet()) {
            try {
                Store.checkIntegrity(entry.getValue(), store.directory());
                out.println("checksum passed: " + entry.getKey());
            } catch (IOException exc) {
                out.println("checksum failed: " + entry.getKey());
                exc.printStackTrace(out);
                corrupt = exc;
            }
        }
        out.flush();
        if (corrupt != null) {
            logger.warn("check index [failure]\n{}", os.bytes().utf8ToString());
            throw corrupt;
        }
    } else {
        // full checkindex
        try (CheckIndex checkIndex = new CheckIndex(store.directory())) {
            checkIndex.setInfoStream(out);
            CheckIndex.Status status = checkIndex.checkIndex();
            out.flush();
            if (!status.clean) {
                if (state == IndexShardState.CLOSED) {
                    // ignore if closed....
                    return;
                }
                logger.warn("check index [failure]\n{}", os.bytes().utf8ToString());
                if ("fix".equals(checkIndexOnStartup)) {
                    if (logger.isDebugEnabled()) {
                        logger.debug("fixing index, writing new segments file ...");
                    }
                    checkIndex.exorciseIndex(status);
                    if (logger.isDebugEnabled()) {
                        logger.debug("index fixed, wrote new segments file \"{}\"", status.segmentsFileName);
                    }
                } else {
                    // only throw a failure if we are not going to fix the index
                    throw new IllegalStateException("index check failure but can't fix it");
                }
            }
        }
    }
    if (logger.isDebugEnabled()) {
        logger.debug("check index [success]\n{}", os.bytes().utf8ToString());
    }
    recoveryState.getVerifyIndex().checkIndexTime(Math.max(0, TimeValue.nsecToMSec(System.nanoTime() - timeNS)));
}
Also used : PrintStream(java.io.PrintStream) StoreFileMetaData(org.elasticsearch.index.store.StoreFileMetaData) IOException(java.io.IOException) Map(java.util.Map) BytesStreamOutput(org.elasticsearch.common.io.stream.BytesStreamOutput) CheckIndex(org.apache.lucene.index.CheckIndex) MetadataSnapshot(org.elasticsearch.index.store.Store.MetadataSnapshot)

Aggregations

StoreFileMetadata (org.elasticsearch.index.store.StoreFileMetadata)34 RoutingAllocation (org.elasticsearch.cluster.routing.allocation.RoutingAllocation)30 StoreFileMetaData (org.elasticsearch.index.store.StoreFileMetaData)25 IOException (java.io.IOException)19 ArrayList (java.util.ArrayList)18 Store (org.elasticsearch.index.store.Store)17 DiscoveryNode (org.elasticsearch.cluster.node.DiscoveryNode)16 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)13 Directory (org.apache.lucene.store.Directory)10 RetentionLease (org.elasticsearch.index.seqno.RetentionLease)10 IndexShardRelocatedException (org.elasticsearch.index.shard.IndexShardRelocatedException)10 List (java.util.List)9 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)9 Document (org.apache.lucene.document.Document)9 StringField (org.apache.lucene.document.StringField)9 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)9 BytesArray (org.elasticsearch.common.bytes.BytesArray)9 RecoveryEngineException (org.elasticsearch.index.engine.RecoveryEngineException)9 ParsedDocument (org.elasticsearch.index.mapper.ParsedDocument)9 Settings (org.elasticsearch.common.settings.Settings)8