use of io.crate.common.unit.TimeValue in project crate by crate.
the class DeleteById method createExecutor.
private ShardRequestExecutor<ShardDeleteRequest> createExecutor(DependencyCarrier dependencies, PlannerContext plannerContext) {
ClusterService clusterService = dependencies.clusterService();
TimeValue requestTimeout = ShardingUpsertExecutor.BULK_REQUEST_TIMEOUT_SETTING.get(clusterService.state().metadata().settings());
return new ShardRequestExecutor<>(clusterService, plannerContext.transactionContext(), dependencies.nodeContext(), table, new DeleteRequests(plannerContext.jobId(), requestTimeout), dependencies.transportActionProvider().transportShardDeleteAction()::execute, docKeys);
}
use of io.crate.common.unit.TimeValue in project crate by crate.
the class ClusterApplierService method runTask.
private void runTask(UpdateTask task) {
if (!lifecycle.started()) {
LOGGER.debug("processing [{}]: ignoring, cluster applier service not started", task.source);
return;
}
LOGGER.debug("processing [{}]: execute", task.source);
final ClusterState previousClusterState = state.get();
long startTimeMS = currentTimeInMillis();
final StopWatch stopWatch = new StopWatch();
final ClusterState newClusterState;
try {
try (Releasable ignored = stopWatch.timing("running task [" + task.source + ']')) {
newClusterState = task.apply(previousClusterState);
}
} catch (Exception e) {
TimeValue executionTime = TimeValue.timeValueMillis(Math.max(0, currentTimeInMillis() - startTimeMS));
LOGGER.trace(() -> new ParameterizedMessage("failed to execute cluster state applier in [{}], state:\nversion [{}], source [{}]\n{}", executionTime, previousClusterState.version(), task.source, previousClusterState), e);
warnAboutSlowTaskIfNeeded(executionTime, task.source, stopWatch);
task.listener.onFailure(task.source, e);
return;
}
if (previousClusterState == newClusterState) {
TimeValue executionTime = TimeValue.timeValueMillis(Math.max(0, currentTimeInMillis() - startTimeMS));
LOGGER.debug("processing [{}]: took [{}] no change in cluster state", task.source, executionTime);
warnAboutSlowTaskIfNeeded(executionTime, task.source, stopWatch);
task.listener.onSuccess(task.source);
} else {
if (LOGGER.isTraceEnabled()) {
LOGGER.debug("cluster state updated, version [{}], source [{}]\n{}", newClusterState.version(), task.source, newClusterState);
} else {
LOGGER.debug("cluster state updated, version [{}], source [{}]", newClusterState.version(), task.source);
}
try {
applyChanges(task, previousClusterState, newClusterState, stopWatch);
TimeValue executionTime = TimeValue.timeValueMillis(Math.max(0, currentTimeInMillis() - startTimeMS));
LOGGER.debug("processing [{}]: took [{}] done applying updated cluster state (version: {}, uuid: {})", task.source, executionTime, newClusterState.version(), newClusterState.stateUUID());
warnAboutSlowTaskIfNeeded(executionTime, task.source, stopWatch);
task.listener.onSuccess(task.source);
} catch (Exception e) {
TimeValue executionTime = TimeValue.timeValueMillis(Math.max(0, currentTimeInMillis() - startTimeMS));
if (LOGGER.isTraceEnabled()) {
LOGGER.warn(new ParameterizedMessage("failed to apply updated cluster state in [{}]:\nversion [{}], uuid [{}], source [{}]\n{}", executionTime, newClusterState.version(), newClusterState.stateUUID(), task.source, newClusterState), e);
} else {
LOGGER.warn(new ParameterizedMessage("failed to apply updated cluster state in [{}]:\nversion [{}], uuid [{}], source [{}]", executionTime, newClusterState.version(), newClusterState.stateUUID(), task.source), e);
}
// continue we will retry with the same cluster state but that might not help.
assert applicationMayFail();
task.listener.onFailure(task.source, e);
}
}
}
use of io.crate.common.unit.TimeValue in project crate by crate.
the class MasterService method runTasks.
private void runTasks(TaskInputs taskInputs) {
final String summary = taskInputs.summary;
if (!lifecycle.started()) {
LOGGER.debug("processing [{}]: ignoring, master service not started", summary);
return;
}
LOGGER.debug("executing cluster state update for [{}]", summary);
final ClusterState previousClusterState = state();
if (!previousClusterState.nodes().isLocalNodeElectedMaster() && taskInputs.runOnlyWhenMaster()) {
LOGGER.debug("failing [{}]: local node is no longer master", summary);
taskInputs.onNoLongerMaster();
return;
}
final long computationStartTime = threadPool.relativeTimeInMillis();
final TaskOutputs taskOutputs = calculateTaskOutputs(taskInputs, previousClusterState);
taskOutputs.notifyFailedTasks();
final TimeValue computationTime = getTimeSince(computationStartTime);
logExecutionTime(computationTime, "compute cluster state update", summary);
if (taskOutputs.clusterStateUnchanged()) {
final long notificationStartTime = threadPool.relativeTimeInMillis();
taskOutputs.notifySuccessfulTasksOnUnchangedClusterState();
final TimeValue executionTime = getTimeSince(notificationStartTime);
logExecutionTime(executionTime, "notify listeners on unchanged cluster state", summary);
} else {
final ClusterState newClusterState = taskOutputs.newClusterState;
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("cluster state updated, source [{}]\n{}", summary, newClusterState);
} else {
LOGGER.debug("cluster state updated, version [{}], source [{}]", newClusterState.version(), summary);
}
final long publicationStartTime = threadPool.relativeTimeInMillis();
try {
ClusterChangedEvent clusterChangedEvent = new ClusterChangedEvent(summary, newClusterState, previousClusterState);
// new cluster state, notify all listeners
final DiscoveryNodes.Delta nodesDelta = clusterChangedEvent.nodesDelta();
if (nodesDelta.hasChanges() && LOGGER.isInfoEnabled()) {
String nodeSummary = nodesDelta.shortSummary();
if (nodeSummary.length() > 0) {
LOGGER.info("{}, term: {}, version: {}, reason: {}", summary, newClusterState.term(), newClusterState.version(), nodeSummary);
}
}
LOGGER.debug("publishing cluster state version [{}]", newClusterState.version());
publish(clusterChangedEvent, taskOutputs, publicationStartTime);
} catch (Exception e) {
handleException(summary, publicationStartTime, newClusterState, e);
}
}
}
use of io.crate.common.unit.TimeValue in project crate by crate.
the class MasterService method handleException.
private void handleException(String summary, long startTimeMillis, ClusterState newClusterState, Exception e) {
final TimeValue executionTime = getTimeSince(startTimeMillis);
final long version = newClusterState.version();
final String stateUUID = newClusterState.stateUUID();
final String fullState = newClusterState.toString();
LOGGER.warn(new ParameterizedMessage("took [{}] and then failed to publish updated cluster state (version: {}, uuid: {}) for [{}]:\n{}", executionTime, version, stateUUID, summary, fullState), e);
// TODO: do we want to call updateTask.onFailure here?
}
use of io.crate.common.unit.TimeValue in project crate by crate.
the class RecoverySourceHandler method phase1.
/**
* Perform phase1 of the recovery operations. Once this {@link IndexCommit}
* snapshot has been performed no commit operations (files being fsync'd)
* are effectively allowed on this index until all recovery phases are done
* <p>
* Phase1 examines the segment files on the target node and copies over the
* segments that are missing. Only segments that have the same size and
* checksum can be reused
*/
void phase1(IndexCommit snapshot, long startingSeqNo, IntSupplier translogOps, ActionListener<SendFileResult> listener) {
cancellableThreads.checkForCancel();
final Store store = shard.store();
try {
final StopWatch stopWatch = new StopWatch().start();
final Store.MetadataSnapshot recoverySourceMetadata;
try {
recoverySourceMetadata = store.getMetadata(snapshot);
} catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
shard.failShard("recovery", ex);
throw ex;
}
for (String name : snapshot.getFileNames()) {
final StoreFileMetadata md = recoverySourceMetadata.get(name);
if (md == null) {
logger.info("Snapshot differs from actual index for file: {} meta: {}", name, recoverySourceMetadata.asMap());
throw new CorruptIndexException("Snapshot differs from actual index - maybe index was removed metadata has " + recoverySourceMetadata.asMap().size() + " files", name);
}
}
if (canSkipPhase1(recoverySourceMetadata, request.metadataSnapshot()) == false) {
final List<String> phase1FileNames = new ArrayList<>();
final List<Long> phase1FileSizes = new ArrayList<>();
final List<String> phase1ExistingFileNames = new ArrayList<>();
final List<Long> phase1ExistingFileSizes = new ArrayList<>();
// Total size of segment files that are recovered
long totalSizeInBytes = 0;
// Total size of segment files that were able to be re-used
long existingTotalSizeInBytes = 0;
// Generate a "diff" of all the identical, different, and missing
// segment files on the target node, using the existing files on
// the source node
final Store.RecoveryDiff diff = recoverySourceMetadata.recoveryDiff(request.metadataSnapshot());
for (StoreFileMetadata md : diff.identical) {
phase1ExistingFileNames.add(md.name());
phase1ExistingFileSizes.add(md.length());
existingTotalSizeInBytes += md.length();
if (logger.isTraceEnabled()) {
logger.trace("recovery [phase1]: not recovering [{}], exist in local store and has checksum [{}]," + " size [{}]", md.name(), md.checksum(), md.length());
}
totalSizeInBytes += md.length();
}
List<StoreFileMetadata> phase1Files = new ArrayList<>(diff.different.size() + diff.missing.size());
phase1Files.addAll(diff.different);
phase1Files.addAll(diff.missing);
for (StoreFileMetadata md : phase1Files) {
if (request.metadataSnapshot().asMap().containsKey(md.name())) {
logger.trace("recovery [phase1]: recovering [{}], exists in local store, but is different: remote [{}], local [{}]", md.name(), request.metadataSnapshot().asMap().get(md.name()), md);
} else {
logger.trace("recovery [phase1]: recovering [{}], does not exist in remote", md.name());
}
phase1FileNames.add(md.name());
phase1FileSizes.add(md.length());
totalSizeInBytes += md.length();
}
logger.trace("recovery [phase1]: recovering_files [{}] with total_size [{}], reusing_files [{}] with total_size [{}]", phase1FileNames.size(), new ByteSizeValue(totalSizeInBytes), phase1ExistingFileNames.size(), new ByteSizeValue(existingTotalSizeInBytes));
final StepListener<Void> sendFileInfoStep = new StepListener<>();
final StepListener<Void> sendFilesStep = new StepListener<>();
final StepListener<RetentionLease> createRetentionLeaseStep = new StepListener<>();
final StepListener<Void> cleanFilesStep = new StepListener<>();
cancellableThreads.checkForCancel();
recoveryTarget.receiveFileInfo(phase1FileNames, phase1FileSizes, phase1ExistingFileNames, phase1ExistingFileSizes, translogOps.getAsInt(), sendFileInfoStep);
sendFileInfoStep.whenComplete(r -> sendFiles(store, phase1Files.toArray(new StoreFileMetadata[0]), translogOps, sendFilesStep), listener::onFailure);
sendFilesStep.whenComplete(r -> createRetentionLease(startingSeqNo, createRetentionLeaseStep), listener::onFailure);
createRetentionLeaseStep.whenComplete(retentionLease -> {
final long lastKnownGlobalCheckpoint = shard.getLastKnownGlobalCheckpoint();
assert retentionLease == null || retentionLease.retainingSequenceNumber() - 1 <= lastKnownGlobalCheckpoint : retentionLease + " vs " + lastKnownGlobalCheckpoint;
// Establishes new empty translog on the replica with global checkpoint set to lastKnownGlobalCheckpoint. We want
// the commit we just copied to be a safe commit on the replica, so why not set the global checkpoint on the replica
// to the max seqno of this commit? Because (in rare corner cases) this commit might not be a safe commit here on
// the primary, and in these cases the max seqno would be too high to be valid as a global checkpoint.
cleanFiles(store, recoverySourceMetadata, translogOps, lastKnownGlobalCheckpoint, cleanFilesStep);
}, listener::onFailure);
final long totalSize = totalSizeInBytes;
final long existingTotalSize = existingTotalSizeInBytes;
cleanFilesStep.whenComplete(r -> {
final TimeValue took = stopWatch.totalTime();
logger.trace("recovery [phase1]: took [{}]", took);
listener.onResponse(new SendFileResult(phase1FileNames, phase1FileSizes, totalSize, phase1ExistingFileNames, phase1ExistingFileSizes, existingTotalSize, took));
}, listener::onFailure);
} else {
logger.trace("skipping [phase1] since source and target have identical sync id [{}]", recoverySourceMetadata.getSyncId());
// but we must still create a retention lease
final StepListener<RetentionLease> createRetentionLeaseStep = new StepListener<>();
createRetentionLease(startingSeqNo, createRetentionLeaseStep);
createRetentionLeaseStep.whenComplete(retentionLease -> {
final TimeValue took = stopWatch.totalTime();
logger.trace("recovery [phase1]: took [{}]", took);
listener.onResponse(new SendFileResult(Collections.emptyList(), Collections.emptyList(), 0L, Collections.emptyList(), Collections.emptyList(), 0L, took));
}, listener::onFailure);
}
} catch (Exception e) {
throw new RecoverFilesRecoveryException(request.shardId(), 0, new ByteSizeValue(0L), e);
}
}
Aggregations