Search in sources :

Example 1 with StopWatch

use of org.elasticsearch.common.StopWatch in project elasticsearch by elastic.

the class RecoverySourceHandler method phase2.

/**
     * Perform phase two of the recovery process.
     * <p>
     * Phase two uses a snapshot of the current translog *without* acquiring the write lock (however, the translog snapshot is
     * point-in-time view of the translog). It then sends each translog operation to the target node so it can be replayed into the new
     * shard.
     *
     * @param startingSeqNo the sequence number to start recovery from, or {@link SequenceNumbersService#UNASSIGNED_SEQ_NO} if all
     *                      ops should be sent
     * @param snapshot      a snapshot of the translog
     */
void phase2(final long startingSeqNo, final Translog.Snapshot snapshot) throws IOException {
    if (shard.state() == IndexShardState.CLOSED) {
        throw new IndexShardClosedException(request.shardId());
    }
    cancellableThreads.checkForCancel();
    final StopWatch stopWatch = new StopWatch().start();
    logger.trace("recovery [phase2]: sending transaction log operations");
    // send all the snapshot's translog operations to the target
    final int totalOperations = sendSnapshot(startingSeqNo, snapshot);
    stopWatch.stop();
    logger.trace("recovery [phase2]: took [{}]", stopWatch.totalTime());
    response.phase2Time = stopWatch.totalTime().millis();
    response.phase2Operations = totalOperations;
}
Also used : IndexShardClosedException(org.elasticsearch.index.shard.IndexShardClosedException) StopWatch(org.elasticsearch.common.StopWatch)

Example 2 with StopWatch

use of org.elasticsearch.common.StopWatch in project elasticsearch by elastic.

the class RecoverySourceHandler method phase1.

/**
     * Perform phase1 of the recovery operations. Once this {@link IndexCommit}
     * snapshot has been performed no commit operations (files being fsync'd)
     * are effectively allowed on this index until all recovery phases are done
     * <p>
     * Phase1 examines the segment files on the target node and copies over the
     * segments that are missing. Only segments that have the same size and
     * checksum can be reused
     */
public void phase1(final IndexCommit snapshot, final Translog.View translogView) {
    cancellableThreads.checkForCancel();
    // Total size of segment files that are recovered
    long totalSize = 0;
    // Total size of segment files that were able to be re-used
    long existingTotalSize = 0;
    final Store store = shard.store();
    store.incRef();
    try {
        StopWatch stopWatch = new StopWatch().start();
        final Store.MetadataSnapshot recoverySourceMetadata;
        try {
            recoverySourceMetadata = store.getMetadata(snapshot);
        } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
            shard.failShard("recovery", ex);
            throw ex;
        }
        for (String name : snapshot.getFileNames()) {
            final StoreFileMetaData md = recoverySourceMetadata.get(name);
            if (md == null) {
                logger.info("Snapshot differs from actual index for file: {} meta: {}", name, recoverySourceMetadata.asMap());
                throw new CorruptIndexException("Snapshot differs from actual index - maybe index was removed metadata has " + recoverySourceMetadata.asMap().size() + " files", name);
            }
        }
        // Generate a "diff" of all the identical, different, and missing
        // segment files on the target node, using the existing files on
        // the source node
        String recoverySourceSyncId = recoverySourceMetadata.getSyncId();
        String recoveryTargetSyncId = request.metadataSnapshot().getSyncId();
        final boolean recoverWithSyncId = recoverySourceSyncId != null && recoverySourceSyncId.equals(recoveryTargetSyncId);
        if (recoverWithSyncId) {
            final long numDocsTarget = request.metadataSnapshot().getNumDocs();
            final long numDocsSource = recoverySourceMetadata.getNumDocs();
            if (numDocsTarget != numDocsSource) {
                throw new IllegalStateException("try to recover " + request.shardId() + " from primary shard with sync id but number " + "of docs differ: " + numDocsSource + " (" + request.sourceNode().getName() + ", primary) vs " + numDocsTarget + "(" + request.targetNode().getName() + ")");
            }
            // we shortcut recovery here because we have nothing to copy. but we must still start the engine on the target.
            // so we don't return here
            logger.trace("skipping [phase1]- identical sync id [{}] found on both source and target", recoverySourceSyncId);
        } else {
            final Store.RecoveryDiff diff = recoverySourceMetadata.recoveryDiff(request.metadataSnapshot());
            for (StoreFileMetaData md : diff.identical) {
                response.phase1ExistingFileNames.add(md.name());
                response.phase1ExistingFileSizes.add(md.length());
                existingTotalSize += md.length();
                if (logger.isTraceEnabled()) {
                    logger.trace("recovery [phase1]: not recovering [{}], exist in local store and has checksum [{}]," + " size [{}]", md.name(), md.checksum(), md.length());
                }
                totalSize += md.length();
            }
            List<StoreFileMetaData> phase1Files = new ArrayList<>(diff.different.size() + diff.missing.size());
            phase1Files.addAll(diff.different);
            phase1Files.addAll(diff.missing);
            for (StoreFileMetaData md : phase1Files) {
                if (request.metadataSnapshot().asMap().containsKey(md.name())) {
                    logger.trace("recovery [phase1]: recovering [{}], exists in local store, but is different: remote [{}], local [{}]", md.name(), request.metadataSnapshot().asMap().get(md.name()), md);
                } else {
                    logger.trace("recovery [phase1]: recovering [{}], does not exist in remote", md.name());
                }
                response.phase1FileNames.add(md.name());
                response.phase1FileSizes.add(md.length());
                totalSize += md.length();
            }
            response.phase1TotalSize = totalSize;
            response.phase1ExistingTotalSize = existingTotalSize;
            logger.trace("recovery [phase1]: recovering_files [{}] with total_size [{}], reusing_files [{}] with total_size [{}]", response.phase1FileNames.size(), new ByteSizeValue(totalSize), response.phase1ExistingFileNames.size(), new ByteSizeValue(existingTotalSize));
            cancellableThreads.execute(() -> recoveryTarget.receiveFileInfo(response.phase1FileNames, response.phase1FileSizes, response.phase1ExistingFileNames, response.phase1ExistingFileSizes, translogView.totalOperations()));
            // How many bytes we've copied since we last called RateLimiter.pause
            final Function<StoreFileMetaData, OutputStream> outputStreamFactories = md -> new BufferedOutputStream(new RecoveryOutputStream(md, translogView), chunkSizeInBytes);
            sendFiles(store, phase1Files.toArray(new StoreFileMetaData[phase1Files.size()]), outputStreamFactories);
            // are deleted
            try {
                cancellableThreads.executeIO(() -> recoveryTarget.cleanFiles(translogView.totalOperations(), recoverySourceMetadata));
            } catch (RemoteTransportException | IOException targetException) {
                final IOException corruptIndexException;
                //   - maybe due to old segments without checksums or length only checks
                if ((corruptIndexException = ExceptionsHelper.unwrapCorruption(targetException)) != null) {
                    try {
                        final Store.MetadataSnapshot recoverySourceMetadata1 = store.getMetadata(snapshot);
                        StoreFileMetaData[] metadata = StreamSupport.stream(recoverySourceMetadata1.spliterator(), false).toArray(size -> new StoreFileMetaData[size]);
                        ArrayUtil.timSort(metadata, (o1, o2) -> {
                            // check small files first
                            return Long.compare(o1.length(), o2.length());
                        });
                        for (StoreFileMetaData md : metadata) {
                            cancellableThreads.checkForCancel();
                            logger.debug("checking integrity for file {} after remove corruption exception", md);
                            if (store.checkIntegrityNoException(md) == false) {
                                // we are corrupted on the primary -- fail!
                                shard.failShard("recovery", corruptIndexException);
                                logger.warn("Corrupted file detected {} checksum mismatch", md);
                                throw corruptIndexException;
                            }
                        }
                    } catch (IOException ex) {
                        targetException.addSuppressed(ex);
                        throw targetException;
                    }
                    // corruption has happened on the way to replica
                    RemoteTransportException exception = new RemoteTransportException("File corruption occurred on recovery but " + "checksums are ok", null);
                    exception.addSuppressed(targetException);
                    logger.warn((org.apache.logging.log4j.util.Supplier<?>) () -> new ParameterizedMessage("{} Remote file corruption during finalization of recovery on node {}. local checksum OK", shard.shardId(), request.targetNode()), corruptIndexException);
                    throw exception;
                } else {
                    throw targetException;
                }
            }
        }
        logger.trace("recovery [phase1]: took [{}]", stopWatch.totalTime());
        response.phase1Time = stopWatch.totalTime().millis();
    } catch (Exception e) {
        throw new RecoverFilesRecoveryException(request.shardId(), response.phase1FileNames.size(), new ByteSizeValue(totalSize), e);
    } finally {
        store.decRef();
    }
}
Also used : IndexCommit(org.apache.lucene.index.IndexCommit) CancellableThreads(org.elasticsearch.common.util.CancellableThreads) IndexShardClosedException(org.elasticsearch.index.shard.IndexShardClosedException) IndexFormatTooNewException(org.apache.lucene.index.IndexFormatTooNewException) Nullable(org.elasticsearch.common.Nullable) IndexShardRelocatedException(org.elasticsearch.index.shard.IndexShardRelocatedException) RecoveryEngineException(org.elasticsearch.index.engine.RecoveryEngineException) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) Function(java.util.function.Function) Supplier(java.util.function.Supplier) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) BufferedOutputStream(java.io.BufferedOutputStream) ArrayList(java.util.ArrayList) BytesArray(org.elasticsearch.common.bytes.BytesArray) StoreFileMetaData(org.elasticsearch.index.store.StoreFileMetaData) Settings(org.elasticsearch.common.settings.Settings) SequenceNumbersService(org.elasticsearch.index.seqno.SequenceNumbersService) Store(org.elasticsearch.index.store.Store) LocalCheckpointTracker(org.elasticsearch.index.seqno.LocalCheckpointTracker) Streams(org.elasticsearch.common.io.Streams) StreamSupport(java.util.stream.StreamSupport) IOContext(org.apache.lucene.store.IOContext) Releasable(org.elasticsearch.common.lease.Releasable) Loggers(org.elasticsearch.common.logging.Loggers) ByteSizeValue(org.elasticsearch.common.unit.ByteSizeValue) OutputStream(java.io.OutputStream) ArrayUtil(org.apache.lucene.util.ArrayUtil) IndexShardState(org.elasticsearch.index.shard.IndexShardState) IndexInput(org.apache.lucene.store.IndexInput) IndexShard(org.elasticsearch.index.shard.IndexShard) IOUtils(org.apache.lucene.util.IOUtils) IOException(java.io.IOException) StopWatch(org.elasticsearch.common.StopWatch) IndexFormatTooOldException(org.apache.lucene.index.IndexFormatTooOldException) ExceptionsHelper(org.elasticsearch.ExceptionsHelper) RemoteTransportException(org.elasticsearch.transport.RemoteTransportException) List(java.util.List) Logger(org.apache.logging.log4j.Logger) InputStreamIndexInput(org.elasticsearch.common.lucene.store.InputStreamIndexInput) Translog(org.elasticsearch.index.translog.Translog) RateLimiter(org.apache.lucene.store.RateLimiter) BufferedOutputStream(java.io.BufferedOutputStream) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) ByteSizeValue(org.elasticsearch.common.unit.ByteSizeValue) Store(org.elasticsearch.index.store.Store) IndexFormatTooOldException(org.apache.lucene.index.IndexFormatTooOldException) StoreFileMetaData(org.elasticsearch.index.store.StoreFileMetaData) Supplier(java.util.function.Supplier) BufferedOutputStream(java.io.BufferedOutputStream) RemoteTransportException(org.elasticsearch.transport.RemoteTransportException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) IOException(java.io.IOException) IndexShardClosedException(org.elasticsearch.index.shard.IndexShardClosedException) IndexFormatTooNewException(org.apache.lucene.index.IndexFormatTooNewException) IndexShardRelocatedException(org.elasticsearch.index.shard.IndexShardRelocatedException) RecoveryEngineException(org.elasticsearch.index.engine.RecoveryEngineException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) IOException(java.io.IOException) IndexFormatTooOldException(org.apache.lucene.index.IndexFormatTooOldException) RemoteTransportException(org.elasticsearch.transport.RemoteTransportException) StopWatch(org.elasticsearch.common.StopWatch) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) IndexFormatTooNewException(org.apache.lucene.index.IndexFormatTooNewException)

Example 3 with StopWatch

use of org.elasticsearch.common.StopWatch in project crate by crate.

the class BlobRecoverySourceHandler method finalizeRecovery.

/**
     * finalizes the recovery process
     */
public void finalizeRecovery() {
    if (shard.state() == IndexShardState.CLOSED) {
        throw new IndexShardClosedException(request.shardId());
    }
    cancellableThreads.checkForCancel();
    StopWatch stopWatch = new StopWatch().start();
    logger.trace("[{}][{}] finalizing recovery to {}", indexName, shardId, request.targetNode());
    cancellableThreads.execute(new Interruptable() {

        @Override
        public void run() throws InterruptedException {
            // Send the FINALIZE request to the target node. The finalize request
            // clears unreferenced translog files, refreshes the engine now that
            // new segments are available, and enables garbage collection of
            // tombstone files. The shard is also moved to the POST_RECOVERY phase
            // during this time
            transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.FINALIZE, new RecoveryFinalizeRecoveryRequest(request.recoveryId(), request.shardId()), TransportRequestOptions.builder().withTimeout(recoverySettings.internalActionLongTimeout()).build(), EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
        }
    });
    if (request.markAsRelocated()) {
        // TODO what happens if the recovery process fails afterwards, we need to mark this back to started
        try {
            shard.relocated("to " + request.targetNode());
        } catch (IllegalIndexShardStateException e) {
        // we can ignore this exception since, on the other node, when it moved to phase3
        // it will also send shard started, which might cause the index shard we work against
        // to move be closed by the time we get to the the relocated method
        }
    }
    stopWatch.stop();
    logger.trace("[{}][{}] finalizing recovery to {}: took [{}]", indexName, shardId, request.targetNode(), stopWatch.totalTime());
}
Also used : IndexShardClosedException(org.elasticsearch.index.shard.IndexShardClosedException) Interruptable(org.elasticsearch.common.util.CancellableThreads.Interruptable) IllegalIndexShardStateException(org.elasticsearch.index.shard.IllegalIndexShardStateException) StopWatch(org.elasticsearch.common.StopWatch)

Example 4 with StopWatch

use of org.elasticsearch.common.StopWatch in project crate by crate.

the class BlobRecoverySourceHandler method phase1.

/**
     * Perform phase1 of the recovery operations. Once this {@link SnapshotIndexCommit}
     * snapshot has been performed no commit operations (files being fsync'd)
     * are effectively allowed on this index until all recovery phases are done
     * <p/>
     * Phase1 examines the segment files on the target node and copies over the
     * segments that are missing. Only segments that have the same size and
     * checksum can be reused
     */
public void phase1(final SnapshotIndexCommit snapshot, final Translog.View translogView) {
    cancellableThreads.checkForCancel();
    // Total size of segment files that are recovered
    long totalSize = 0;
    // Total size of segment files that were able to be re-used
    long existingTotalSize = 0;
    final Store store = shard.store();
    store.incRef();
    try {
        // CRATE CHANGE
        if (blobRecoveryHandler != null) {
            blobRecoveryHandler.phase1();
        }
        StopWatch stopWatch = new StopWatch().start();
        final Store.MetadataSnapshot recoverySourceMetadata;
        try {
            recoverySourceMetadata = store.getMetadata(snapshot);
        } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
            shard.engine().failEngine("recovery", ex);
            throw ex;
        }
        for (String name : snapshot.getFiles()) {
            final StoreFileMetaData md = recoverySourceMetadata.get(name);
            if (md == null) {
                logger.info("Snapshot differs from actual index for file: {} meta: {}", name, recoverySourceMetadata.asMap());
                throw new CorruptIndexException("Snapshot differs from actual index - maybe index was removed metadata has " + recoverySourceMetadata.asMap().size() + " files", name);
            }
        }
        // Generate a "diff" of all the identical, different, and missing
        // segment files on the target node, using the existing files on
        // the source node
        String recoverySourceSyncId = recoverySourceMetadata.getSyncId();
        String recoveryTargetSyncId = request.metadataSnapshot().getSyncId();
        final boolean recoverWithSyncId = recoverySourceSyncId != null && recoverySourceSyncId.equals(recoveryTargetSyncId);
        if (recoverWithSyncId) {
            final long numDocsTarget = request.metadataSnapshot().getNumDocs();
            final long numDocsSource = recoverySourceMetadata.getNumDocs();
            if (numDocsTarget != numDocsSource) {
                throw new IllegalStateException("try to recover " + request.shardId() + " from primary shard with sync id but number of docs differ: " + numDocsTarget + " (" + request.sourceNode().getName() + ", primary) vs " + numDocsSource + "(" + request.targetNode().getName() + ")");
            }
            // we shortcut recovery here because we have nothing to copy. but we must still start the engine on the target.
            // so we don't return here
            logger.trace("[{}][{}] skipping [phase1] to {} - identical sync id [{}] found on both source and target", indexName, shardId, request.targetNode(), recoverySourceSyncId);
        } else {
            final Store.RecoveryDiff diff = recoverySourceMetadata.recoveryDiff(request.metadataSnapshot());
            for (StoreFileMetaData md : diff.identical) {
                response.phase1ExistingFileNames.add(md.name());
                response.phase1ExistingFileSizes.add(md.length());
                existingTotalSize += md.length();
                if (logger.isTraceEnabled()) {
                    logger.trace("[{}][{}] recovery [phase1] to {}: not recovering [{}], exists in local store and has checksum [{}], size [{}]", indexName, shardId, request.targetNode(), md.name(), md.checksum(), md.length());
                }
                totalSize += md.length();
            }
            for (StoreFileMetaData md : Iterables.concat(diff.different, diff.missing)) {
                if (request.metadataSnapshot().asMap().containsKey(md.name())) {
                    logger.trace("[{}][{}] recovery [phase1] to {}: recovering [{}], exists in local store, but is different: remote [{}], local [{}]", indexName, shardId, request.targetNode(), md.name(), request.metadataSnapshot().asMap().get(md.name()), md);
                } else {
                    logger.trace("[{}][{}] recovery [phase1] to {}: recovering [{}], does not exists in remote", indexName, shardId, request.targetNode(), md.name());
                }
                response.phase1FileNames.add(md.name());
                response.phase1FileSizes.add(md.length());
                totalSize += md.length();
            }
            response.phase1TotalSize = totalSize;
            response.phase1ExistingTotalSize = existingTotalSize;
            logger.trace("[{}][{}] recovery [phase1] to {}: recovering_files [{}] with total_size [{}], reusing_files [{}] with total_size [{}]", indexName, shardId, request.targetNode(), response.phase1FileNames.size(), new ByteSizeValue(totalSize), response.phase1ExistingFileNames.size(), new ByteSizeValue(existingTotalSize));
            cancellableThreads.execute(new Interruptable() {

                @Override
                public void run() throws InterruptedException {
                    RecoveryFilesInfoRequest recoveryInfoFilesRequest = new RecoveryFilesInfoRequest(request.recoveryId(), request.shardId(), response.phase1FileNames, response.phase1FileSizes, response.phase1ExistingFileNames, response.phase1ExistingFileSizes, translogView.totalOperations());
                    transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.FILES_INFO, recoveryInfoFilesRequest, TransportRequestOptions.builder().withTimeout(recoverySettings.internalActionTimeout()).build(), EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
                }
            });
            // This latch will be used to wait until all files have been transferred to the target node
            final CountDownLatch latch = new CountDownLatch(response.phase1FileNames.size());
            final CopyOnWriteArrayList<Throwable> exceptions = new CopyOnWriteArrayList<>();
            final AtomicReference<Throwable> corruptedEngine = new AtomicReference<>();
            int fileIndex = 0;
            ThreadPoolExecutor pool;
            // How many bytes we've copied since we last called RateLimiter.pause
            final AtomicLong bytesSinceLastPause = new AtomicLong();
            for (final String name : response.phase1FileNames) {
                long fileSize = response.phase1FileSizes.get(fileIndex);
                // separately.
                if (fileSize > RecoverySettings.SMALL_FILE_CUTOFF_BYTES) {
                    pool = recoverySettings.concurrentStreamPool();
                } else {
                    pool = recoverySettings.concurrentSmallFileStreamPool();
                }
                pool.execute(new AbstractRunnable() {

                    @Override
                    public void onFailure(Throwable t) {
                        // we either got rejected or the store can't be incremented / we are canceled
                        logger.debug("Failed to transfer file [" + name + "] on recovery");
                    }

                    @Override
                    public void onAfter() {
                        // Signify this file has completed by decrementing the latch
                        latch.countDown();
                    }

                    @Override
                    protected void doRun() {
                        cancellableThreads.checkForCancel();
                        store.incRef();
                        final StoreFileMetaData md = recoverySourceMetadata.get(name);
                        try (final IndexInput indexInput = store.directory().openInput(name, IOContext.READONCE)) {
                            // at least one!
                            final int BUFFER_SIZE = (int) Math.max(1, recoverySettings.fileChunkSize().getBytes());
                            final byte[] buf = new byte[BUFFER_SIZE];
                            boolean shouldCompressRequest = recoverySettings.compress();
                            if (CompressorFactory.isCompressed(indexInput)) {
                                shouldCompressRequest = false;
                            }
                            final long len = indexInput.length();
                            long readCount = 0;
                            final TransportRequestOptions requestOptions = TransportRequestOptions.builder().withCompress(shouldCompressRequest).withType(TransportRequestOptions.Type.RECOVERY).withTimeout(recoverySettings.internalActionTimeout()).build();
                            while (readCount < len) {
                                if (shard.state() == IndexShardState.CLOSED) {
                                    // check if the shard got closed on us
                                    throw new IndexShardClosedException(shard.shardId());
                                }
                                int toRead = readCount + BUFFER_SIZE > len ? (int) (len - readCount) : BUFFER_SIZE;
                                final long position = indexInput.getFilePointer();
                                // Pause using the rate limiter, if desired, to throttle the recovery
                                RateLimiter rl = recoverySettings.rateLimiter();
                                long throttleTimeInNanos = 0;
                                if (rl != null) {
                                    long bytes = bytesSinceLastPause.addAndGet(toRead);
                                    if (bytes > rl.getMinPauseCheckBytes()) {
                                        // Time to pause
                                        bytesSinceLastPause.addAndGet(-bytes);
                                        throttleTimeInNanos = rl.pause(bytes);
                                        shard.recoveryStats().addThrottleTime(throttleTimeInNanos);
                                    }
                                }
                                indexInput.readBytes(buf, 0, toRead, false);
                                final BytesArray content = new BytesArray(buf, 0, toRead);
                                readCount += toRead;
                                final boolean lastChunk = readCount == len;
                                final RecoveryFileChunkRequest fileChunkRequest = new RecoveryFileChunkRequest(request.recoveryId(), request.shardId(), md, position, content, lastChunk, translogView.totalOperations(), throttleTimeInNanos);
                                cancellableThreads.execute(new Interruptable() {

                                    @Override
                                    public void run() throws InterruptedException {
                                        // Actually send the file chunk to the target node, waiting for it to complete
                                        transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.FILE_CHUNK, fileChunkRequest, requestOptions, EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
                                    }
                                });
                            }
                        } catch (Throwable e) {
                            final Throwable corruptIndexException;
                            if ((corruptIndexException = ExceptionsHelper.unwrapCorruption(e)) != null) {
                                if (store.checkIntegrityNoException(md) == false) {
                                    // we are corrupted on the primary -- fail!
                                    logger.warn("{} Corrupted file detected {} checksum mismatch", shard.shardId(), md);
                                    if (corruptedEngine.compareAndSet(null, corruptIndexException) == false) {
                                        // if we are not the first exception, add ourselves as suppressed to the main one:
                                        corruptedEngine.get().addSuppressed(e);
                                    }
                                } else {
                                    // corruption has happened on the way to replica
                                    RemoteTransportException exception = new RemoteTransportException("File corruption occurred on recovery but checksums are ok", null);
                                    exception.addSuppressed(e);
                                    // last exception first
                                    exceptions.add(0, exception);
                                    logger.warn("{} Remote file corruption on node {}, recovering {}. local checksum OK", corruptIndexException, shard.shardId(), request.targetNode(), md);
                                }
                            } else {
                                // last exceptions first
                                exceptions.add(0, e);
                            }
                        } finally {
                            store.decRef();
                        }
                    }
                });
                fileIndex++;
            }
            cancellableThreads.execute(new Interruptable() {

                @Override
                public void run() throws InterruptedException {
                    // Wait for all files that need to be transferred to finish transferring
                    latch.await();
                }
            });
            if (corruptedEngine.get() != null) {
                shard.engine().failEngine("recovery", corruptedEngine.get());
                throw corruptedEngine.get();
            } else {
                ExceptionsHelper.rethrowAndSuppress(exceptions);
            }
            cancellableThreads.execute(new Interruptable() {

                @Override
                public void run() throws InterruptedException {
                    // are deleted
                    try {
                        transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.CLEAN_FILES, new RecoveryCleanFilesRequest(request.recoveryId(), shard.shardId(), recoverySourceMetadata, translogView.totalOperations()), TransportRequestOptions.builder().withTimeout(recoverySettings.internalActionTimeout()).build(), EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
                    } catch (RemoteTransportException remoteException) {
                        final IOException corruptIndexException;
                        //   - maybe due to old segments without checksums or length only checks
                        if ((corruptIndexException = ExceptionsHelper.unwrapCorruption(remoteException)) != null) {
                            try {
                                final Store.MetadataSnapshot recoverySourceMetadata = store.getMetadata(snapshot);
                                StoreFileMetaData[] metadata = Iterables.toArray(recoverySourceMetadata, StoreFileMetaData.class);
                                ArrayUtil.timSort(metadata, new Comparator<StoreFileMetaData>() {

                                    @Override
                                    public int compare(StoreFileMetaData o1, StoreFileMetaData o2) {
                                        // check small files first
                                        return Long.compare(o1.length(), o2.length());
                                    }
                                });
                                for (StoreFileMetaData md : metadata) {
                                    logger.debug("{} checking integrity for file {} after remove corruption exception", shard.shardId(), md);
                                    if (store.checkIntegrityNoException(md) == false) {
                                        // we are corrupted on the primary -- fail!
                                        shard.engine().failEngine("recovery", corruptIndexException);
                                        logger.warn("{} Corrupted file detected {} checksum mismatch", shard.shardId(), md);
                                        throw corruptIndexException;
                                    }
                                }
                            } catch (IOException ex) {
                                remoteException.addSuppressed(ex);
                                throw remoteException;
                            }
                            // corruption has happened on the way to replica
                            RemoteTransportException exception = new RemoteTransportException("File corruption occurred on recovery but checksums are ok", null);
                            exception.addSuppressed(remoteException);
                            logger.warn("{} Remote file corruption during finalization on node {}, recovering {}. local checksum OK", corruptIndexException, shard.shardId(), request.targetNode());
                            throw exception;
                        } else {
                            throw remoteException;
                        }
                    }
                }
            });
        }
        prepareTargetForTranslog(translogView);
        logger.trace("[{}][{}] recovery [phase1] to {}: took [{}]", indexName, shardId, request.targetNode(), stopWatch.totalTime());
        response.phase1Time = stopWatch.totalTime().millis();
    } catch (Throwable e) {
        throw new RecoverFilesRecoveryException(request.shardId(), response.phase1FileNames.size(), new ByteSizeValue(totalSize), e);
    } finally {
        store.decRef();
    }
}
Also used : AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) ByteSizeValue(org.elasticsearch.common.unit.ByteSizeValue) Store(org.elasticsearch.index.store.Store) IndexFormatTooOldException(org.apache.lucene.index.IndexFormatTooOldException) StoreFileMetaData(org.elasticsearch.index.store.StoreFileMetaData) IndexInput(org.apache.lucene.store.IndexInput) TransportRequestOptions(org.elasticsearch.transport.TransportRequestOptions) RemoteTransportException(org.elasticsearch.transport.RemoteTransportException) BytesArray(org.elasticsearch.common.bytes.BytesArray) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) Interruptable(org.elasticsearch.common.util.CancellableThreads.Interruptable) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) CountDownLatch(java.util.concurrent.CountDownLatch) RateLimiter(org.apache.lucene.store.RateLimiter) StopWatch(org.elasticsearch.common.StopWatch) AtomicLong(java.util.concurrent.atomic.AtomicLong) IndexShardClosedException(org.elasticsearch.index.shard.IndexShardClosedException) IndexFormatTooNewException(org.apache.lucene.index.IndexFormatTooNewException) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList)

Example 5 with StopWatch

use of org.elasticsearch.common.StopWatch in project crate by crate.

the class BlobRecoverySourceHandler method prepareTargetForTranslog.

protected void prepareTargetForTranslog(final Translog.View translogView) {
    StopWatch stopWatch = new StopWatch().start();
    logger.trace("{} recovery [phase1] to {}: prepare remote engine for translog", request.shardId(), request.targetNode());
    final long startEngineStart = stopWatch.totalTime().millis();
    cancellableThreads.execute(new Interruptable() {

        @Override
        public void run() throws InterruptedException {
            // Send a request preparing the new shard's translog to receive
            // operations. This ensures the shard engine is started and disables
            // garbage collection (not the JVM's GC!) of tombstone deletes
            transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.PREPARE_TRANSLOG, new RecoveryPrepareForTranslogOperationsRequest(request.recoveryId(), request.shardId(), translogView.totalOperations()), TransportRequestOptions.builder().withTimeout(recoverySettings.internalActionTimeout()).build(), EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
        }
    });
    stopWatch.stop();
    response.startTime = stopWatch.totalTime().millis() - startEngineStart;
    logger.trace("{} recovery [phase1] to {}: remote engine start took [{}]", request.shardId(), request.targetNode(), stopWatch.totalTime());
}
Also used : Interruptable(org.elasticsearch.common.util.CancellableThreads.Interruptable) StopWatch(org.elasticsearch.common.StopWatch)

Aggregations

StopWatch (org.elasticsearch.common.StopWatch)21 IndexShardClosedException (org.elasticsearch.index.shard.IndexShardClosedException)11 TimeValue (io.crate.common.unit.TimeValue)4 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 AtomicReference (java.util.concurrent.atomic.AtomicReference)3 Interruptable (org.elasticsearch.common.util.CancellableThreads.Interruptable)3 Closeable (java.io.Closeable)2 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)2 IndexFormatTooNewException (org.apache.lucene.index.IndexFormatTooNewException)2 IndexFormatTooOldException (org.apache.lucene.index.IndexFormatTooOldException)2 IndexInput (org.apache.lucene.store.IndexInput)2 RateLimiter (org.apache.lucene.store.RateLimiter)2 ElasticsearchException (org.elasticsearch.ElasticsearchException)2 StepListener (org.elasticsearch.action.StepListener)2 Client (org.elasticsearch.client.Client)2 NodeClient (org.elasticsearch.client.node.NodeClient)2 NodeConnectionsService (org.elasticsearch.cluster.NodeConnectionsService)2 ClusterService (org.elasticsearch.cluster.service.ClusterService)2 BytesArray (org.elasticsearch.common.bytes.BytesArray)2