Search in sources :

Example 31 with TimeValue

use of io.crate.common.unit.TimeValue in project crate by crate.

the class RecoverySourceHandler method recoverToTarget.

/**
 * performs the recovery from the local engine to the target
 */
public void recoverToTarget(ActionListener<RecoveryResponse> listener) {
    final Closeable releaseResources = () -> IOUtils.close(resources);
    final ActionListener<RecoveryResponse> wrappedListener = ActionListener.notifyOnce(listener);
    try {
        cancellableThreads.setOnCancel((reason, beforeCancelEx) -> {
            final RuntimeException e;
            if (shard.state() == IndexShardState.CLOSED) {
                // check if the shard got closed on us
                e = new IndexShardClosedException(shard.shardId(), "shard is closed and recovery was canceled reason [" + reason + "]");
            } else {
                e = new CancellableThreads.ExecutionCancelledException("recovery was canceled reason [" + reason + "]");
            }
            if (beforeCancelEx != null) {
                e.addSuppressed(beforeCancelEx);
            }
            IOUtils.closeWhileHandlingException(releaseResources, () -> wrappedListener.onFailure(e));
            throw e;
        });
        final Consumer<Exception> onFailure = e -> {
            assert Transports.assertNotTransportThread(RecoverySourceHandler.this + "[onFailure]");
            IOUtils.closeWhileHandlingException(releaseResources, () -> wrappedListener.onFailure(e));
        };
        final boolean softDeletesEnabled = shard.indexSettings().isSoftDeleteEnabled();
        final SetOnce<RetentionLease> retentionLeaseRef = new SetOnce<>();
        runUnderPrimaryPermit(() -> {
            final IndexShardRoutingTable routingTable = shard.getReplicationGroup().getRoutingTable();
            ShardRouting targetShardRouting = routingTable.getByAllocationId(request.targetAllocationId());
            if (targetShardRouting == null) {
                logger.debug("delaying recovery of {} as it is not listed as assigned to target node {}", request.shardId(), request.targetNode());
                throw new DelayRecoveryException("source node does not have the shard listed in its state as allocated on the node");
            }
            assert targetShardRouting.initializing() : "expected recovery target to be initializing but was " + targetShardRouting;
            retentionLeaseRef.set(shard.getRetentionLeases().get(ReplicationTracker.getPeerRecoveryRetentionLeaseId(targetShardRouting)));
        }, shardId + " validating recovery target [" + request.targetAllocationId() + "] registered ", shard, cancellableThreads, logger);
        final Engine.HistorySource historySource;
        if (softDeletesEnabled && (shard.useRetentionLeasesInPeerRecovery() || retentionLeaseRef.get() != null)) {
            historySource = Engine.HistorySource.INDEX;
        } else {
            historySource = Engine.HistorySource.TRANSLOG;
        }
        final Closeable retentionLock = shard.acquireHistoryRetentionLock(historySource);
        resources.add(retentionLock);
        final long startingSeqNo;
        final boolean isSequenceNumberBasedRecovery = request.startingSeqNo() != SequenceNumbers.UNASSIGNED_SEQ_NO && isTargetSameHistory() && shard.hasCompleteHistoryOperations("peer-recovery", historySource, request.startingSeqNo()) && (historySource == Engine.HistorySource.TRANSLOG || (retentionLeaseRef.get() != null && retentionLeaseRef.get().retainingSequenceNumber() <= request.startingSeqNo()));
        if (isSequenceNumberBasedRecovery && softDeletesEnabled && retentionLeaseRef.get() != null) {
            // all the history we need is retained by an existing retention lease, so we do not need a separate retention lock
            retentionLock.close();
            logger.trace("history is retained by {}", retentionLeaseRef.get());
        } else {
            // all the history we need is retained by the retention lock, obtained before calling shard.hasCompleteHistoryOperations()
            // and before acquiring the safe commit we'll be using, so we can be certain that all operations after the safe commit's
            // local checkpoint will be retained for the duration of this recovery.
            logger.trace("history is retained by retention lock");
        }
        final StepListener<SendFileResult> sendFileStep = new StepListener<>();
        final StepListener<TimeValue> prepareEngineStep = new StepListener<>();
        final StepListener<SendSnapshotResult> sendSnapshotStep = new StepListener<>();
        final StepListener<Void> finalizeStep = new StepListener<>();
        if (isSequenceNumberBasedRecovery) {
            logger.trace("performing sequence numbers based recovery. starting at [{}]", request.startingSeqNo());
            startingSeqNo = request.startingSeqNo();
            if (retentionLeaseRef.get() == null) {
                createRetentionLease(startingSeqNo, ActionListener.map(sendFileStep, ignored -> SendFileResult.EMPTY));
            } else {
                sendFileStep.onResponse(SendFileResult.EMPTY);
            }
        } else {
            final Engine.IndexCommitRef safeCommitRef;
            try {
                safeCommitRef = shard.acquireSafeIndexCommit();
                resources.add(safeCommitRef);
            } catch (final Exception e) {
                throw new RecoveryEngineException(shard.shardId(), 1, "snapshot failed", e);
            }
            // Try and copy enough operations to the recovering peer so that if it is promoted to primary then it has a chance of being
            // able to recover other replicas using operations-based recoveries. If we are not using retention leases then we
            // conservatively copy all available operations. If we are using retention leases then "enough operations" is just the
            // operations from the local checkpoint of the safe commit onwards, because when using soft deletes the safe commit retains
            // at least as much history as anything else. The safe commit will often contain all the history retained by the current set
            // of retention leases, but this is not guaranteed: an earlier peer recovery from a different primary might have created a
            // retention lease for some history that this primary already discarded, since we discard history when the global checkpoint
            // advances and not when creating a new safe commit. In any case this is a best-effort thing since future recoveries can
            // always fall back to file-based ones, and only really presents a problem if this primary fails before things have settled
            // down.
            startingSeqNo = softDeletesEnabled ? Long.parseLong(safeCommitRef.getIndexCommit().getUserData().get(SequenceNumbers.LOCAL_CHECKPOINT_KEY)) + 1L : 0;
            logger.trace("performing file-based recovery followed by history replay starting at [{}]", startingSeqNo);
            try {
                final int estimateNumOps = shard.estimateNumberOfHistoryOperations("peer-recovery", historySource, startingSeqNo);
                final Releasable releaseStore = acquireStore(shard.store());
                resources.add(releaseStore);
                sendFileStep.whenComplete(r -> IOUtils.close(safeCommitRef, releaseStore), e -> {
                    try {
                        IOUtils.close(safeCommitRef, releaseStore);
                    } catch (final IOException ex) {
                        logger.warn("releasing snapshot caused exception", ex);
                    }
                });
                final StepListener<ReplicationResponse> deleteRetentionLeaseStep = new StepListener<>();
                runUnderPrimaryPermit(() -> {
                    try {
                        // If the target previously had a copy of this shard then a file-based recovery might move its global
                        // checkpoint backwards. We must therefore remove any existing retention lease so that we can create a
                        // new one later on in the recovery.
                        shard.removePeerRecoveryRetentionLease(request.targetNode().getId(), new ThreadedActionListener<>(logger, shard.getThreadPool(), ThreadPool.Names.GENERIC, deleteRetentionLeaseStep, false));
                    } catch (RetentionLeaseNotFoundException e) {
                        logger.debug("no peer-recovery retention lease for " + request.targetAllocationId());
                        deleteRetentionLeaseStep.onResponse(null);
                    }
                }, shardId + " removing retention lease for [" + request.targetAllocationId() + "]", shard, cancellableThreads, logger);
                deleteRetentionLeaseStep.whenComplete(ignored -> {
                    assert Transports.assertNotTransportThread(RecoverySourceHandler.this + "[phase1]");
                    phase1(safeCommitRef.getIndexCommit(), startingSeqNo, () -> estimateNumOps, sendFileStep);
                }, onFailure);
            } catch (final Exception e) {
                throw new RecoveryEngineException(shard.shardId(), 1, "sendFileStep failed", e);
            }
        }
        assert startingSeqNo >= 0 : "startingSeqNo must be non negative. got: " + startingSeqNo;
        sendFileStep.whenComplete(r -> {
            assert Transports.assertNotTransportThread(RecoverySourceHandler.this + "[prepareTargetForTranslog]");
            // For a sequence based recovery, the target can keep its local translog
            prepareTargetForTranslog(shard.estimateNumberOfHistoryOperations("peer-recovery", historySource, startingSeqNo), prepareEngineStep);
        }, onFailure);
        prepareEngineStep.whenComplete(prepareEngineTime -> {
            assert Transports.assertNotTransportThread(RecoverySourceHandler.this + "[phase2]");
            /*
                 * add shard to replication group (shard will receive replication requests from this point on)
                 * now that engine is open. This means that any document indexed into the primary after
                 * this will be replicated to this replica as well make sure to do this before sampling
                 * the max sequence number in the next step, to ensure that we send all documents up to
                 * maxSeqNo in phase2.
                 */
            runUnderPrimaryPermit(() -> shard.initiateTracking(request.targetAllocationId()), shardId + " initiating tracking of " + request.targetAllocationId(), shard, cancellableThreads, logger);
            final long endingSeqNo = shard.seqNoStats().getMaxSeqNo();
            // CRATE_PATCH
            try {
                blobRecoveryHook();
            } catch (Exception e) {
                throw new RecoveryEngineException(shard.shardId(), 1, "blobRecoveryHook failed", e);
            }
            if (logger.isTraceEnabled()) {
                logger.trace("snapshot translog for recovery; current size is [{}]", shard.estimateNumberOfHistoryOperations("peer-recovery", historySource, startingSeqNo));
            }
            final Translog.Snapshot phase2Snapshot = shard.getHistoryOperations("peer-recovery", historySource, startingSeqNo);
            resources.add(phase2Snapshot);
            retentionLock.close();
            // we have to capture the max_seen_auto_id_timestamp and the max_seq_no_of_updates to make sure that these values
            // are at least as high as the corresponding values on the primary when any of these operations were executed on it.
            final long maxSeenAutoIdTimestamp = shard.getMaxSeenAutoIdTimestamp();
            final long maxSeqNoOfUpdatesOrDeletes = shard.getMaxSeqNoOfUpdatesOrDeletes();
            final RetentionLeases retentionLeases = shard.getRetentionLeases();
            final long mappingVersionOnPrimary = shard.indexSettings().getIndexMetadata().getMappingVersion();
            phase2(startingSeqNo, endingSeqNo, phase2Snapshot, maxSeenAutoIdTimestamp, maxSeqNoOfUpdatesOrDeletes, retentionLeases, mappingVersionOnPrimary, sendSnapshotStep);
        }, onFailure);
        // Recovery target can trim all operations >= startingSeqNo as we have sent all these operations in the phase 2
        final long trimAboveSeqNo = startingSeqNo - 1;
        sendSnapshotStep.whenComplete(r -> finalizeRecovery(r.targetLocalCheckpoint, trimAboveSeqNo, finalizeStep), onFailure);
        finalizeStep.whenComplete(r -> {
            // TODO: return the actual throttle time
            final long phase1ThrottlingWaitTime = 0L;
            final SendSnapshotResult sendSnapshotResult = sendSnapshotStep.result();
            final SendFileResult sendFileResult = sendFileStep.result();
            final RecoveryResponse response = new RecoveryResponse(sendFileResult.phase1FileNames, sendFileResult.phase1FileSizes, sendFileResult.phase1ExistingFileNames, sendFileResult.phase1ExistingFileSizes, sendFileResult.totalSize, sendFileResult.existingTotalSize, sendFileResult.took.millis(), phase1ThrottlingWaitTime, prepareEngineStep.result().millis(), sendSnapshotResult.sentOperations, sendSnapshotResult.tookTime.millis());
            try {
                wrappedListener.onResponse(response);
            } finally {
                IOUtils.close(resources);
            }
        }, onFailure);
    } catch (Exception e) {
        IOUtils.closeWhileHandlingException(releaseResources, () -> wrappedListener.onFailure(e));
    }
}
Also used : CancellableThreads(org.elasticsearch.common.util.CancellableThreads) Arrays(java.util.Arrays) IndexFormatTooNewException(org.apache.lucene.index.IndexFormatTooNewException) Releasables(org.elasticsearch.common.lease.Releasables) RecoveryEngineException(org.elasticsearch.index.engine.RecoveryEngineException) RetentionLeaseNotFoundException(org.elasticsearch.index.seqno.RetentionLeaseNotFoundException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) StoreFileMetadata(org.elasticsearch.index.store.StoreFileMetadata) Transports(org.elasticsearch.transport.Transports) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Locale(java.util.Locale) ThreadPool(org.elasticsearch.threadpool.ThreadPool) ActionRunnable(org.elasticsearch.action.ActionRunnable) IOContext(org.apache.lucene.store.IOContext) StepListener(org.elasticsearch.action.StepListener) Releasable(org.elasticsearch.common.lease.Releasable) ByteSizeValue(org.elasticsearch.common.unit.ByteSizeValue) PlainActionFuture(org.elasticsearch.action.support.PlainActionFuture) IndexShardRoutingTable(org.elasticsearch.cluster.routing.IndexShardRoutingTable) BytesReference(org.elasticsearch.common.bytes.BytesReference) Engine(org.elasticsearch.index.engine.Engine) RemoteTransportException(org.elasticsearch.transport.RemoteTransportException) List(java.util.List) Logger(org.apache.logging.log4j.Logger) Version(org.elasticsearch.Version) InputStreamIndexInput(org.elasticsearch.common.lucene.store.InputStreamIndexInput) TimeValue(io.crate.common.unit.TimeValue) ReplicationTracker(org.elasticsearch.index.seqno.ReplicationTracker) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) IndexCommit(org.apache.lucene.index.IndexCommit) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) IndexShardClosedException(org.elasticsearch.index.shard.IndexShardClosedException) IndexShardRelocatedException(org.elasticsearch.index.shard.IndexShardRelocatedException) CompletableFuture(java.util.concurrent.CompletableFuture) Deque(java.util.Deque) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) ArrayList(java.util.ArrayList) BytesArray(org.elasticsearch.common.bytes.BytesArray) RetentionLease(org.elasticsearch.index.seqno.RetentionLease) RetentionLeases(org.elasticsearch.index.seqno.RetentionLeases) Store(org.elasticsearch.index.store.Store) StreamSupport(java.util.stream.StreamSupport) IntSupplier(java.util.function.IntSupplier) Loggers(org.elasticsearch.common.logging.Loggers) ArrayUtil(org.apache.lucene.util.ArrayUtil) FutureUtils(org.elasticsearch.common.util.concurrent.FutureUtils) SequenceNumbers(org.elasticsearch.index.seqno.SequenceNumbers) IndexShardState(org.elasticsearch.index.shard.IndexShardState) IndexInput(org.apache.lucene.store.IndexInput) SetOnce(org.apache.lucene.util.SetOnce) IOUtils(io.crate.common.io.IOUtils) IndexShard(org.elasticsearch.index.shard.IndexShard) IOException(java.io.IOException) StopWatch(org.elasticsearch.common.StopWatch) IndexFormatTooOldException(org.apache.lucene.index.IndexFormatTooOldException) ConcurrentLinkedDeque(java.util.concurrent.ConcurrentLinkedDeque) Consumer(java.util.function.Consumer) ExceptionsHelper(org.elasticsearch.ExceptionsHelper) AtomicLong(java.util.concurrent.atomic.AtomicLong) ReplicationResponse(org.elasticsearch.action.support.replication.ReplicationResponse) Closeable(java.io.Closeable) Translog(org.elasticsearch.index.translog.Translog) ThreadedActionListener(org.elasticsearch.action.support.ThreadedActionListener) Comparator(java.util.Comparator) Collections(java.util.Collections) RateLimiter(org.apache.lucene.store.RateLimiter) ActionListener(org.elasticsearch.action.ActionListener) IndexShardRoutingTable(org.elasticsearch.cluster.routing.IndexShardRoutingTable) Closeable(java.io.Closeable) ReplicationResponse(org.elasticsearch.action.support.replication.ReplicationResponse) Translog(org.elasticsearch.index.translog.Translog) RecoveryEngineException(org.elasticsearch.index.engine.RecoveryEngineException) Engine(org.elasticsearch.index.engine.Engine) TimeValue(io.crate.common.unit.TimeValue) CancellableThreads(org.elasticsearch.common.util.CancellableThreads) SetOnce(org.apache.lucene.util.SetOnce) IOException(java.io.IOException) IndexFormatTooNewException(org.apache.lucene.index.IndexFormatTooNewException) RecoveryEngineException(org.elasticsearch.index.engine.RecoveryEngineException) RetentionLeaseNotFoundException(org.elasticsearch.index.seqno.RetentionLeaseNotFoundException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) RemoteTransportException(org.elasticsearch.transport.RemoteTransportException) IndexShardClosedException(org.elasticsearch.index.shard.IndexShardClosedException) IndexShardRelocatedException(org.elasticsearch.index.shard.IndexShardRelocatedException) IOException(java.io.IOException) IndexFormatTooOldException(org.apache.lucene.index.IndexFormatTooOldException) RetentionLeases(org.elasticsearch.index.seqno.RetentionLeases) RetentionLeaseNotFoundException(org.elasticsearch.index.seqno.RetentionLeaseNotFoundException) IndexShardClosedException(org.elasticsearch.index.shard.IndexShardClosedException) RetentionLease(org.elasticsearch.index.seqno.RetentionLease) StepListener(org.elasticsearch.action.StepListener) Releasable(org.elasticsearch.common.lease.Releasable) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting)

Example 32 with TimeValue

use of io.crate.common.unit.TimeValue in project crate by crate.

the class RecoverySourceHandler method phase2.

/**
 * Perform phase two of the recovery process.
 * <p>
 * Phase two uses a snapshot of the current translog *without* acquiring the write lock (however, the translog snapshot is
 * point-in-time view of the translog). It then sends each translog operation to the target node so it can be replayed into the new
 * shard.
 *
 * @param startingSeqNo              the sequence number to start recovery from, or {@link SequenceNumbers#UNASSIGNED_SEQ_NO} if all
 *                                   ops should be sent
 * @param endingSeqNo                the highest sequence number that should be sent
 * @param snapshot                   a snapshot of the translog
 * @param maxSeenAutoIdTimestamp     the max auto_id_timestamp of append-only requests on the primary
 * @param maxSeqNoOfUpdatesOrDeletes the max seq_no of updates or deletes on the primary after these operations were executed on it.
 * @param listener                   a listener which will be notified with the local checkpoint on the target.
 */
void phase2(long startingSeqNo, long endingSeqNo, Translog.Snapshot snapshot, long maxSeenAutoIdTimestamp, long maxSeqNoOfUpdatesOrDeletes, RetentionLeases retentionLeases, long mappingVersion, ActionListener<SendSnapshotResult> listener) throws IOException {
    if (shard.state() == IndexShardState.CLOSED) {
        throw new IndexShardClosedException(request.shardId());
    }
    logger.trace("recovery [phase2]: sending transaction log operations (from [" + startingSeqNo + "] to [" + endingSeqNo + "]");
    final StopWatch stopWatch = new StopWatch().start();
    final StepListener<Void> sendListener = new StepListener<>();
    final OperationBatchSender sender = new OperationBatchSender(startingSeqNo, endingSeqNo, snapshot, maxSeenAutoIdTimestamp, maxSeqNoOfUpdatesOrDeletes, retentionLeases, mappingVersion, sendListener);
    sendListener.whenComplete(ignored -> {
        final long skippedOps = sender.skippedOps.get();
        final int totalSentOps = sender.sentOps.get();
        final long targetLocalCheckpoint = sender.targetLocalCheckpoint.get();
        assert snapshot.totalOperations() == snapshot.skippedOperations() + skippedOps + totalSentOps : String.format(Locale.ROOT, "expected total [%d], overridden [%d], skipped [%d], total sent [%d]", snapshot.totalOperations(), snapshot.skippedOperations(), skippedOps, totalSentOps);
        stopWatch.stop();
        final TimeValue tookTime = stopWatch.totalTime();
        logger.trace("recovery [phase2]: took [{}]", tookTime);
        listener.onResponse(new SendSnapshotResult(targetLocalCheckpoint, totalSentOps, tookTime));
    }, listener::onFailure);
    sender.start();
}
Also used : IndexShardClosedException(org.elasticsearch.index.shard.IndexShardClosedException) StepListener(org.elasticsearch.action.StepListener) TimeValue(io.crate.common.unit.TimeValue) StopWatch(org.elasticsearch.common.StopWatch)

Example 33 with TimeValue

use of io.crate.common.unit.TimeValue in project crate by crate.

the class RecoverySourceHandler method prepareTargetForTranslog.

void prepareTargetForTranslog(int totalTranslogOps, ActionListener<TimeValue> listener) {
    StopWatch stopWatch = new StopWatch().start();
    final ActionListener<Void> wrappedListener = ActionListener.wrap(nullVal -> {
        stopWatch.stop();
        final TimeValue tookTime = stopWatch.totalTime();
        logger.trace("recovery [phase1]: remote engine start took [{}]", tookTime);
        listener.onResponse(tookTime);
    }, e -> listener.onFailure(new RecoveryEngineException(shard.shardId(), 1, "prepare target for translog failed", e)));
    // Send a request preparing the new shard's translog to receive operations. This ensures the shard engine is started and disables
    // garbage collection (not the JVM's GC!) of tombstone deletes.
    logger.trace("recovery [phase1]: prepare remote engine for translog");
    cancellableThreads.checkForCancel();
    recoveryTarget.prepareForTranslogOperations(totalTranslogOps, wrappedListener);
}
Also used : RecoveryEngineException(org.elasticsearch.index.engine.RecoveryEngineException) TimeValue(io.crate.common.unit.TimeValue) StopWatch(org.elasticsearch.common.StopWatch)

Example 34 with TimeValue

use of io.crate.common.unit.TimeValue in project crate by crate.

the class JvmGcMonitorService method logSlowGc.

static void logSlowGc(final Logger logger, final JvmMonitor.Threshold threshold, final long seq, final JvmMonitor.SlowGcEvent slowGcEvent, BiFunction<JvmStats, JvmStats, String> pools) {
    final String name = slowGcEvent.currentGc.getName();
    final long elapsed = slowGcEvent.elapsed;
    final long totalGcCollectionCount = slowGcEvent.currentGc.getCollectionCount();
    final long currentGcCollectionCount = slowGcEvent.collectionCount;
    final TimeValue totalGcCollectionTime = slowGcEvent.currentGc.getCollectionTime();
    final TimeValue currentGcCollectionTime = slowGcEvent.collectionTime;
    final JvmStats lastJvmStats = slowGcEvent.lastJvmStats;
    final JvmStats currentJvmStats = slowGcEvent.currentJvmStats;
    final ByteSizeValue maxHeapUsed = slowGcEvent.maxHeapUsed;
    switch(threshold) {
        case WARN:
            if (logger.isWarnEnabled()) {
                logger.warn(SLOW_GC_LOG_MESSAGE, name, seq, totalGcCollectionCount, currentGcCollectionTime, currentGcCollectionCount, TimeValue.timeValueMillis(elapsed), currentGcCollectionTime, totalGcCollectionTime, lastJvmStats.getMem().getHeapUsed(), currentJvmStats.getMem().getHeapUsed(), maxHeapUsed, pools.apply(lastJvmStats, currentJvmStats));
            }
            break;
        case INFO:
            if (logger.isInfoEnabled()) {
                logger.info(SLOW_GC_LOG_MESSAGE, name, seq, totalGcCollectionCount, currentGcCollectionTime, currentGcCollectionCount, TimeValue.timeValueMillis(elapsed), currentGcCollectionTime, totalGcCollectionTime, lastJvmStats.getMem().getHeapUsed(), currentJvmStats.getMem().getHeapUsed(), maxHeapUsed, pools.apply(lastJvmStats, currentJvmStats));
            }
            break;
        case DEBUG:
            if (logger.isDebugEnabled()) {
                logger.debug(SLOW_GC_LOG_MESSAGE, name, seq, totalGcCollectionCount, currentGcCollectionTime, currentGcCollectionCount, TimeValue.timeValueMillis(elapsed), currentGcCollectionTime, totalGcCollectionTime, lastJvmStats.getMem().getHeapUsed(), currentJvmStats.getMem().getHeapUsed(), maxHeapUsed, pools.apply(lastJvmStats, currentJvmStats));
            }
            break;
        default:
            break;
    }
}
Also used : ByteSizeValue(org.elasticsearch.common.unit.ByteSizeValue) TimeValue(io.crate.common.unit.TimeValue)

Example 35 with TimeValue

use of io.crate.common.unit.TimeValue in project crate by crate.

the class Node method start.

/**
 * Start the node. If the node is already started, this method is no-op.
 */
public Node start() throws NodeValidationException {
    if (!lifecycle.moveToStarted()) {
        return this;
    }
    logger.info("starting ...");
    pluginLifecycleComponents.forEach(LifecycleComponent::start);
    injector.getInstance(BlobService.class).start();
    injector.getInstance(DecommissioningService.class).start();
    injector.getInstance(NodeDisconnectJobMonitorService.class).start();
    injector.getInstance(JobsLogService.class).start();
    injector.getInstance(PostgresNetty.class).start();
    injector.getInstance(TasksService.class).start();
    injector.getInstance(Schemas.class).start();
    injector.getInstance(ArrayMapperService.class).start();
    injector.getInstance(DanglingArtifactsService.class).start();
    injector.getInstance(SslContextProviderService.class).start();
    injector.getInstance(MappingUpdatedAction.class).setClient(client);
    injector.getInstance(IndicesService.class).start();
    injector.getInstance(IndicesClusterStateService.class).start();
    injector.getInstance(SnapshotsService.class).start();
    injector.getInstance(SnapshotShardsService.class).start();
    nodeService.getMonitorService().start();
    final ClusterService clusterService = injector.getInstance(ClusterService.class);
    final NodeConnectionsService nodeConnectionsService = injector.getInstance(NodeConnectionsService.class);
    nodeConnectionsService.start();
    clusterService.setNodeConnectionsService(nodeConnectionsService);
    injector.getInstance(GatewayService.class).start();
    Discovery discovery = injector.getInstance(Discovery.class);
    clusterService.getMasterService().setClusterStatePublisher(discovery::publish);
    HttpServerTransport httpServerTransport = injector.getInstance(HttpServerTransport.class);
    httpServerTransport.start();
    // CRATE_PATCH: add http publish address to the discovery node
    TransportAddress publishAddress = httpServerTransport.info().address().publishAddress();
    localNodeFactory.httpPublishAddress = publishAddress.getAddress() + ':' + publishAddress.getPort();
    // Start the transport service now so the publish address will be added to the local disco node in ClusterService
    TransportService transportService = injector.getInstance(TransportService.class);
    transportService.start();
    assert localNodeFactory.getNode() != null;
    assert transportService.getLocalNode().equals(localNodeFactory.getNode()) : "transportService has a different local node than the factory provided";
    injector.getInstance(PeerRecoverySourceService.class).start();
    // Load (and maybe upgrade) the metadata stored on disk
    final GatewayMetaState gatewayMetaState = injector.getInstance(GatewayMetaState.class);
    gatewayMetaState.start(settings(), transportService, clusterService, injector.getInstance(MetaStateService.class), injector.getInstance(MetadataIndexUpgradeService.class), injector.getInstance(MetadataUpgrader.class), injector.getInstance(PersistedClusterStateService.class));
    if (Assertions.ENABLED) {
        try {
            assert injector.getInstance(MetaStateService.class).loadFullState().v1().isEmpty();
            final NodeMetadata nodeMetaData = NodeMetadata.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, nodeEnvironment.nodeDataPaths());
            assert nodeMetaData != null;
            assert nodeMetaData.nodeVersion().equals(Version.CURRENT);
            assert nodeMetaData.nodeId().equals(localNodeFactory.getNode().getId());
        } catch (IOException e) {
            assert false : e;
        }
    }
    // we load the global state here (the persistent part of the cluster state stored on disk) to
    // pass it to the bootstrap checks to allow plugins to enforce certain preconditions based on the recovered state.
    final Metadata onDiskMetadata = gatewayMetaState.getPersistedState().getLastAcceptedState().metadata();
    // this is never null
    assert onDiskMetadata != null : "metadata is null but shouldn't";
    validateNodeBeforeAcceptingRequests(transportService.boundAddress(), pluginsService.filterPlugins(Plugin.class).stream().flatMap(p -> p.getBootstrapChecks().stream()).collect(Collectors.toList()));
    // start after transport service so the local disco is known
    // start before cluster service so that it can set initial state on ClusterApplierService
    discovery.start();
    clusterService.start();
    assert clusterService.localNode().equals(localNodeFactory.getNode()) : "clusterService has a different local node than the factory provided";
    transportService.acceptIncomingRequests();
    discovery.startInitialJoin();
    final TimeValue initialStateTimeout = INITIAL_STATE_TIMEOUT_SETTING.get(settings);
    configureNodeAndClusterIdStateListener(clusterService);
    if (initialStateTimeout.millis() > 0) {
        final ThreadPool thread = injector.getInstance(ThreadPool.class);
        ClusterState clusterState = clusterService.state();
        ClusterStateObserver observer = new ClusterStateObserver(clusterState, clusterService, null, logger);
        if (clusterState.nodes().getMasterNodeId() == null) {
            logger.debug("waiting to join the cluster. timeout [{}]", initialStateTimeout);
            final CountDownLatch latch = new CountDownLatch(1);
            observer.waitForNextChange(new ClusterStateObserver.Listener() {

                @Override
                public void onNewClusterState(ClusterState state) {
                    latch.countDown();
                }

                @Override
                public void onClusterServiceClose() {
                    latch.countDown();
                }

                @Override
                public void onTimeout(TimeValue timeout) {
                    logger.warn("timed out while waiting for initial discovery state - timeout: {}", initialStateTimeout);
                    latch.countDown();
                }
            }, state -> state.nodes().getMasterNodeId() != null, initialStateTimeout);
            try {
                latch.await();
            } catch (InterruptedException e) {
                throw new ElasticsearchTimeoutException("Interrupted while waiting for initial discovery state");
            }
        }
    }
    if (WRITE_PORTS_FILE_SETTING.get(settings)) {
        TransportService transport = injector.getInstance(TransportService.class);
        writePortsFile("transport", transport.boundAddress());
        HttpServerTransport http = injector.getInstance(HttpServerTransport.class);
        writePortsFile("http", http.boundAddress());
    }
    logger.info("started");
    pluginsService.filterPlugins(ClusterPlugin.class).forEach(ClusterPlugin::onNodeStarted);
    return this;
}
Also used : SnapshotsService(org.elasticsearch.snapshots.SnapshotsService) SnapshotShardsService(org.elasticsearch.snapshots.SnapshotShardsService) NodeConnectionsService(org.elasticsearch.cluster.NodeConnectionsService) BoundTransportAddress(org.elasticsearch.common.transport.BoundTransportAddress) TransportAddress(org.elasticsearch.common.transport.TransportAddress) SslContextProviderService(io.crate.protocols.ssl.SslContextProviderService) IndexMetadata(org.elasticsearch.cluster.metadata.IndexMetadata) Metadata(org.elasticsearch.cluster.metadata.Metadata) IndexTemplateMetadata(org.elasticsearch.cluster.metadata.IndexTemplateMetadata) NodeMetadata(org.elasticsearch.env.NodeMetadata) ThreadPool(org.elasticsearch.threadpool.ThreadPool) MetadataUpgrader(org.elasticsearch.plugins.MetadataUpgrader) TasksService(io.crate.execution.jobs.TasksService) HttpServerTransport(org.elasticsearch.http.HttpServerTransport) DecommissioningService(io.crate.cluster.gracefulstop.DecommissioningService) GatewayMetaState(org.elasticsearch.gateway.GatewayMetaState) PostgresNetty(io.crate.protocols.postgres.PostgresNetty) MetaStateService(org.elasticsearch.gateway.MetaStateService) IndicesClusterStateService(org.elasticsearch.indices.cluster.IndicesClusterStateService) LifecycleComponent(org.elasticsearch.common.component.LifecycleComponent) PeerRecoverySourceService(org.elasticsearch.indices.recovery.PeerRecoverySourceService) DanglingArtifactsService(io.crate.metadata.DanglingArtifactsService) TimeValue(io.crate.common.unit.TimeValue) JobsLogService(io.crate.execution.engine.collect.stats.JobsLogService) ClusterState(org.elasticsearch.cluster.ClusterState) ClusterStateObserver(org.elasticsearch.cluster.ClusterStateObserver) ClusterPlugin(org.elasticsearch.plugins.ClusterPlugin) Discovery(org.elasticsearch.discovery.Discovery) IndicesService(org.elasticsearch.indices.IndicesService) MetadataIndexUpgradeService(org.elasticsearch.cluster.metadata.MetadataIndexUpgradeService) IOException(java.io.IOException) Schemas(io.crate.metadata.Schemas) CountDownLatch(java.util.concurrent.CountDownLatch) GatewayService(org.elasticsearch.gateway.GatewayService) NodeMetadata(org.elasticsearch.env.NodeMetadata) ClusterService(org.elasticsearch.cluster.service.ClusterService) ElasticsearchTimeoutException(org.elasticsearch.ElasticsearchTimeoutException) BlobService(io.crate.blob.BlobService) NodeDisconnectJobMonitorService(io.crate.execution.jobs.transport.NodeDisconnectJobMonitorService) TransportService(org.elasticsearch.transport.TransportService) MappingUpdatedAction(org.elasticsearch.cluster.action.index.MappingUpdatedAction) PersistedClusterStateService(org.elasticsearch.gateway.PersistedClusterStateService) ArrayMapperService(io.crate.lucene.ArrayMapperService)

Aggregations

TimeValue (io.crate.common.unit.TimeValue)75 Test (org.junit.Test)23 ClusterState (org.elasticsearch.cluster.ClusterState)20 IOException (java.io.IOException)17 ParameterizedMessage (org.apache.logging.log4j.message.ParameterizedMessage)12 ActionListener (org.elasticsearch.action.ActionListener)12 IndexMetadata (org.elasticsearch.cluster.metadata.IndexMetadata)11 ArrayList (java.util.ArrayList)10 ThreadPool (org.elasticsearch.threadpool.ThreadPool)10 ElasticsearchException (org.elasticsearch.ElasticsearchException)9 Settings (org.elasticsearch.common.settings.Settings)9 Logger (org.apache.logging.log4j.Logger)8 ClusterStateUpdateTask (org.elasticsearch.cluster.ClusterStateUpdateTask)8 ClusterService (org.elasticsearch.cluster.service.ClusterService)8 List (java.util.List)7 LogManager (org.apache.logging.log4j.LogManager)7 Version (org.elasticsearch.Version)7 ElasticsearchTimeoutException (org.elasticsearch.ElasticsearchTimeoutException)6 ClusterStateObserver (org.elasticsearch.cluster.ClusterStateObserver)6 StreamInput (org.elasticsearch.common.io.stream.StreamInput)6