use of org.apache.cassandra.exceptions.RepairException in project cassandra by apache.
the class RepairSession method start.
/**
* Start RepairJob on given ColumnFamilies.
*
* This first validates if all replica are available, and if they are,
* creates RepairJobs and submit to run on given executor.
*
* @param executor Executor to run validation
*/
public void start(ExecutorPlus executor) {
String message;
if (terminated)
return;
logger.info("{} parentSessionId = {}: new session: will sync {} on range {} for {}.{}", previewKind.logPrefix(getId()), parentRepairSession, repairedNodes(), commonRange, keyspace, Arrays.toString(cfnames));
Tracing.traceRepair("Syncing range {}", commonRange);
if (!previewKind.isPreview()) {
SystemDistributedKeyspace.startRepairs(getId(), parentRepairSession, keyspace, cfnames, commonRange);
}
if (commonRange.endpoints.isEmpty()) {
logger.info("{} {}", previewKind.logPrefix(getId()), message = String.format("No neighbors to repair with on range %s: session completed", commonRange));
Tracing.traceRepair(message);
trySuccess(new RepairSessionResult(id, keyspace, commonRange.ranges, Lists.<RepairResult>newArrayList(), commonRange.hasSkippedReplicas));
if (!previewKind.isPreview()) {
SystemDistributedKeyspace.failRepairs(getId(), keyspace, cfnames, new RuntimeException(message));
}
return;
}
// Checking all nodes are live
for (InetAddressAndPort endpoint : commonRange.endpoints) {
if (!FailureDetector.instance.isAlive(endpoint) && !commonRange.hasSkippedReplicas) {
message = String.format("Cannot proceed on repair because a neighbor (%s) is dead: session failed", endpoint);
logger.error("{} {}", previewKind.logPrefix(getId()), message);
Exception e = new IOException(message);
tryFailure(e);
if (!previewKind.isPreview()) {
SystemDistributedKeyspace.failRepairs(getId(), keyspace, cfnames, e);
}
return;
}
}
// Create and submit RepairJob for each ColumnFamily
List<Future<RepairResult>> jobs = new ArrayList<>(cfnames.length);
for (String cfname : cfnames) {
RepairJob job = new RepairJob(this, cfname);
executor.execute(job);
jobs.add(job);
}
// When all RepairJobs are done without error, cleanup and set the final result
FBUtilities.allOf(jobs).addCallback(new FutureCallback<List<RepairResult>>() {
public void onSuccess(List<RepairResult> results) {
// this repair session is completed
logger.info("{} {}", previewKind.logPrefix(getId()), "Session completed successfully");
Tracing.traceRepair("Completed sync of range {}", commonRange);
trySuccess(new RepairSessionResult(id, keyspace, commonRange.ranges, results, commonRange.hasSkippedReplicas));
taskExecutor.shutdown();
// mark this session as terminated
terminate();
}
public void onFailure(Throwable t) {
String msg = "{} Session completed with the following error";
if (Throwables.anyCauseMatches(t, RepairException::shouldWarn))
logger.warn(msg + ": {}", previewKind.logPrefix(getId()), t.getMessage());
else
logger.error(msg, previewKind.logPrefix(getId()), t);
Tracing.traceRepair("Session completed with the following error: {}", t);
forceShutdown(t);
}
});
}
use of org.apache.cassandra.exceptions.RepairException in project cassandra by apache.
the class RepairRunnable method notifyError.
@Override
public void notifyError(Throwable error) {
// exception should be ignored
if (error instanceof SomeRepairFailedException)
return;
if (Throwables.anyCauseMatches(error, RepairException::shouldWarn)) {
logger.warn("Repair {} aborted: {}", parentSession, error.getMessage());
if (logger.isDebugEnabled())
logger.debug("Repair {} aborted: ", parentSession, error);
} else {
logger.error("Repair {} failed:", parentSession, error);
}
StorageMetrics.repairExceptions.inc();
String errorMessage = String.format("Repair command #%d failed with error %s", cmd, error.getMessage());
fireProgressEvent(new ProgressEvent(ProgressEventType.ERROR, progressCounter.get(), totalProgress, errorMessage));
firstError.compareAndSet(null, error);
// since this can fail, update table only after updating in-memory and notification state
maybeStoreParentRepairFailure(error);
}
Aggregations