use of in project crate by crate.
the class TransportIndicesStatsAction method shardOperation.
protected ShardStats shardOperation(IndicesStatsRequest request, ShardRouting shardRouting) {
IndexService indexService = indicesService.indexServiceSafe(shardRouting.shardId().getIndex());
IndexShard indexShard = indexService.getShard(shardRouting.shardId().id());
// if we don't have the routing entry yet, we need it stats wise, we treat it as if the shard is not ready yet
if (indexShard.routingEntry() == null) {
throw new ShardNotFoundException(indexShard.shardId());
CommonStatsFlags flags = new CommonStatsFlags().clear();
if ( {
if ( {
CommitStats commitStats;
SeqNoStats seqNoStats;
RetentionLeaseStats retentionLeaseStats;
try {
commitStats = indexShard.commitStats();
seqNoStats = indexShard.seqNoStats();
retentionLeaseStats = indexShard.getRetentionLeaseStats();
} catch (AlreadyClosedException e) {
// shard is closed - no stats is fine
commitStats = null;
seqNoStats = null;
retentionLeaseStats = null;
return new ShardStats(indexShard.routingEntry(), indexShard.shardPath(), new CommonStats(indexShard, flags), commitStats, seqNoStats, retentionLeaseStats);
use of in project crate by crate.
the class PeerRecoveryTargetService method doRecovery.
private void doRecovery(final long recoveryId) {
final StartRecoveryRequest request;
final RecoveryState.Timer timer;
CancellableThreads cancellableThreads;
try (RecoveryRef recoveryRef = onGoingRecoveries.getRecovery(recoveryId)) {
if (recoveryRef == null) {
LOGGER.trace("not running recovery with id [{}] - can not find it (probably finished)", recoveryId);
final RecoveryTarget recoveryTarget =;
timer = recoveryTarget.state().getTimer();
cancellableThreads = recoveryTarget.cancellableThreads();
try {
assert recoveryTarget.sourceNode() != null : "can not do a recovery without a source node";
LOGGER.trace("{} preparing shard for peer recovery", recoveryTarget.shardId());
final long startingSeqNo = recoveryTarget.indexShard().recoverLocallyUpToGlobalCheckpoint();
assert startingSeqNo == UNASSIGNED_SEQ_NO || recoveryTarget.state().getStage() == RecoveryState.Stage.TRANSLOG : "unexpected recovery stage [" + recoveryTarget.state().getStage() + "] starting seqno [ " + startingSeqNo + "]";
request = getStartRecoveryRequest(LOGGER, clusterService.localNode(), recoveryTarget, startingSeqNo);
} catch (final Exception e) {
// this will be logged as warning later on...
LOGGER.trace("unexpected error while preparing shard for peer recovery, failing recovery", e);
onGoingRecoveries.failRecovery(recoveryId, new RecoveryFailedException(recoveryTarget.state(), "failed to prepare shard for recovery", e), true);
Consumer<Exception> handleException = e -> {
if (LOGGER.isTraceEnabled()) {
LOGGER.trace(() -> new ParameterizedMessage("[{}][{}] Got exception on recovery", request.shardId().getIndex().getName(), request.shardId().id()), e);
Throwable cause = SQLExceptions.unwrap(e);
if (cause instanceof CancellableThreads.ExecutionCancelledException) {
// this can also come from the source wrapped in a RemoteTransportException
onGoingRecoveries.failRecovery(recoveryId, new RecoveryFailedException(request, "source has canceled the recovery", cause), false);
if (cause instanceof RecoveryEngineException) {
// unwrap an exception that was thrown as part of the recovery
cause = cause.getCause();
// do it twice, in case we have double transport exception
cause = SQLExceptions.unwrap(cause);
if (cause instanceof RecoveryEngineException) {
// unwrap an exception that was thrown as part of the recovery
cause = cause.getCause();
if (cause instanceof IllegalIndexShardStateException || cause instanceof IndexNotFoundException || cause instanceof ShardNotFoundException) {
// if the target is not ready yet, retry
retryRecovery(recoveryId, "remote shard not ready", recoverySettings.retryDelayStateSync(), recoverySettings.activityTimeout());
if (cause instanceof DelayRecoveryException) {
retryRecovery(recoveryId, cause, recoverySettings.retryDelayStateSync(), recoverySettings.activityTimeout());
if (cause instanceof ConnectTransportException) {
LOGGER.debug("delaying recovery of {} for [{}] due to networking error [{}]", request.shardId(), recoverySettings.retryDelayNetwork(), cause.getMessage());
retryRecovery(recoveryId, cause.getMessage(), recoverySettings.retryDelayNetwork(), recoverySettings.activityTimeout());
if (cause instanceof AlreadyClosedException) {
onGoingRecoveries.failRecovery(recoveryId, new RecoveryFailedException(request, "source shard is closed", cause), false);
onGoingRecoveries.failRecovery(recoveryId, new RecoveryFailedException(request, e), true);
try {
LOGGER.trace("{} starting recovery from {}", request.shardId(), request.sourceNode());
cancellableThreads.executeIO(() -> transportService.sendRequest(request.sourceNode(), PeerRecoverySourceService.Actions.START_RECOVERY, request, new TransportResponseHandler<RecoveryResponse>() {
public void handleResponse(RecoveryResponse recoveryResponse) {
final TimeValue recoveryTime = new TimeValue(timer.time());
// do this through ongoing recoveries to remove it from the collection
if (LOGGER.isTraceEnabled()) {
StringBuilder sb = new StringBuilder();
sb.append('[').append(request.shardId().getIndex().getName()).append(']').append('[').append(request.shardId().id()).append("] ");
sb.append("recovery completed from ").append(request.sourceNode()).append(", took[").append(recoveryTime).append("]\n");
sb.append(" phase1: recovered_files [").append(recoveryResponse.phase1FileNames.size()).append("]").append(" with total_size of [").append(new ByteSizeValue(recoveryResponse.phase1TotalSize)).append("]").append(", took [").append(timeValueMillis(recoveryResponse.phase1Time)).append("], throttling_wait [").append(timeValueMillis(recoveryResponse.phase1ThrottlingWaitTime)).append(']').append("\n");
sb.append(" : reusing_files [").append(recoveryResponse.phase1ExistingFileNames.size()).append("] with total_size of [").append(new ByteSizeValue(recoveryResponse.phase1ExistingTotalSize)).append("]\n");
sb.append(" phase2: start took [").append(timeValueMillis(recoveryResponse.startTime)).append("]\n");
sb.append(" : recovered [").append(recoveryResponse.phase2Operations).append("]").append(" transaction log operations").append(", took [").append(timeValueMillis(recoveryResponse.phase2Time)).append("]").append("\n");
LOGGER.trace("{}", sb);
} else {
LOGGER.debug("{} recovery done from [{}], took [{}]", request.shardId(), request.sourceNode(), recoveryTime);
public void handleException(TransportException e) {
public String executor() {
// we do some heavy work like refreshes in the response so fork off to the generic threadpool
return ThreadPool.Names.GENERIC;
public RecoveryResponse read(StreamInput in) throws IOException {
return new RecoveryResponse(in);
} catch (CancellableThreads.ExecutionCancelledException e) {
LOGGER.trace("recovery cancelled", e);
} catch (Exception e) {
use of in project crate by crate.
the class InternalEngine method rollTranslogGeneration.
public void rollTranslogGeneration() throws EngineException {
try (ReleasableLock ignored = readLock.acquire()) {
} catch (AlreadyClosedException e) {
throw e;
} catch (Exception e) {
try {
failEngine("translog trimming failed", e);
} catch (Exception inner) {
throw new EngineException(shardId, "failed to roll translog", e);
use of in project crate by crate.
the class InternalEngine method trimOperationsFromTranslog.
public void trimOperationsFromTranslog(long belowTerm, long aboveSeqNo) throws EngineException {
try (ReleasableLock lock = readLock.acquire()) {
translog.trimOperations(belowTerm, aboveSeqNo);
} catch (AlreadyClosedException e) {
throw e;
} catch (Exception e) {
try {
failEngine("translog operations trimming failed", e);
} catch (Exception inner) {
throw new EngineException(shardId, "failed to trim translog operations", e);
use of in project crate by crate.
the class InternalEngine method flush.
public CommitId flush(boolean force, boolean waitIfOngoing) throws EngineException {
if (force && waitIfOngoing == false) {
assert false : "wait_if_ongoing must be true for a force flush: force=" + force + " wait_if_ongoing=" + waitIfOngoing;
throw new IllegalArgumentException("wait_if_ongoing must be true for a force flush: force=" + force + " wait_if_ongoing=" + waitIfOngoing);
final byte[] newCommitId;
try (ReleasableLock lock = readLock.acquire()) {
if (flushLock.tryLock() == false) {
// if we can't get the lock right away we block if needed otherwise barf
if (waitIfOngoing) {
logger.trace("waiting for in-flight flush to finish");
logger.trace("acquired flush lock after blocking");
} else {
return new CommitId(lastCommittedSegmentInfos.getId());
} else {
logger.trace("acquired flush lock immediately");
try {
// Only flush if (1) Lucene has uncommitted docs, or (2) forced by caller, or (3) the
// newly created commit points to a different translog generation (can free translog),
// or (4) the local checkpoint information in the last commit is stale, which slows down future recoveries.
boolean hasUncommittedChanges = indexWriter.hasUncommittedChanges();
boolean shouldPeriodicallyFlush = shouldPeriodicallyFlush();
if (hasUncommittedChanges || force || shouldPeriodicallyFlush || getProcessedLocalCheckpoint() > Long.parseLong(lastCommittedSegmentInfos.userData.get(SequenceNumbers.LOCAL_CHECKPOINT_KEY))) {
try {
logger.trace("starting commit for flush; commitTranslog=true");
commitIndexWriter(indexWriter, translog, null);
logger.trace("finished commit for flush");
// we need to refresh in order to clear older version values
refresh("version_table_flush", SearcherScope.INTERNAL, true);
} catch (AlreadyClosedException e) {
throw e;
} catch (Exception e) {
throw new FlushFailedEngineException(shardId, e);
newCommitId = lastCommittedSegmentInfos.getId();
} catch (FlushFailedEngineException ex) {
maybeFailEngine("flush", ex);
throw ex;
} finally {
// (e.g., moves backwards) we will at least still sometimes prune deleted tombstones:
if (engineConfig.isEnableGcDeletes()) {
return new CommitId(newCommitId);