use of org.apache.cassandra.streaming.PreviewKind in project cassandra by apache.
the class RepairJob method createOptimisedSyncingSyncTasks.
static List<SyncTask> createOptimisedSyncingSyncTasks(RepairJobDesc desc, List<TreeResponse> trees, InetAddressAndPort local, Predicate<InetAddressAndPort> isTransient, Function<InetAddressAndPort, String> getDC, boolean isIncremental, PreviewKind previewKind) {
long startedAt = currentTimeMillis();
List<SyncTask> syncTasks = new ArrayList<>();
// We need to difference all trees one against another
DifferenceHolder diffHolder = new DifferenceHolder(trees);
logger.trace("diffs = {}", diffHolder);
PreferedNodeFilter preferSameDCFilter = (streaming, candidates) -> candidates.stream().filter(node -> getDC.apply(streaming).equals(getDC.apply(node))).collect(Collectors.toSet());
ImmutableMap<InetAddressAndPort, HostDifferences> reducedDifferences = ReduceHelper.reduce(diffHolder, preferSameDCFilter);
for (int i = 0; i < trees.size(); i++) {
InetAddressAndPort address = trees.get(i).endpoint;
// we don't stream to transient replicas
if (isTransient.test(address))
continue;
HostDifferences streamsFor = reducedDifferences.get(address);
if (streamsFor != null) {
Preconditions.checkArgument(streamsFor.get(address).isEmpty(), "We should not fetch ranges from ourselves");
for (InetAddressAndPort fetchFrom : streamsFor.hosts()) {
List<Range<Token>> toFetch = new ArrayList<>(streamsFor.get(fetchFrom));
assert !toFetch.isEmpty();
logger.trace("{} is about to fetch {} from {}", address, toFetch, fetchFrom);
SyncTask task;
if (address.equals(local)) {
task = new LocalSyncTask(desc, address, fetchFrom, toFetch, isIncremental ? desc.parentSessionId : null, true, false, previewKind);
} else {
task = new AsymmetricRemoteSyncTask(desc, address, fetchFrom, toFetch, previewKind);
}
syncTasks.add(task);
}
} else {
logger.trace("Node {} has nothing to stream", address);
}
}
logger.info("Created {} optimised sync tasks based on {} merkle tree responses for {} (took: {}ms)", syncTasks.size(), trees.size(), desc.parentSessionId, currentTimeMillis() - startedAt);
logger.trace("Optimised sync tasks for {}: {}", desc.parentSessionId, syncTasks);
return syncTasks;
}
use of org.apache.cassandra.streaming.PreviewKind in project cassandra by apache.
the class RepairMessageVerbHandler method doVerb.
public void doVerb(final Message<RepairMessage> message) {
// TODO add cancel/interrupt message
RepairJobDesc desc = message.payload.desc;
try {
switch(message.verb()) {
case PREPARE_MSG:
PrepareMessage prepareMessage = (PrepareMessage) message.payload;
logger.debug("Preparing, {}", prepareMessage);
if (!ActiveRepairService.verifyCompactionsPendingThreshold(prepareMessage.parentRepairSession, prepareMessage.previewKind)) {
// error is logged in verifyCompactionsPendingThreshold
sendFailureResponse(message);
return;
}
List<ColumnFamilyStore> columnFamilyStores = new ArrayList<>(prepareMessage.tableIds.size());
for (TableId tableId : prepareMessage.tableIds) {
ColumnFamilyStore columnFamilyStore = ColumnFamilyStore.getIfExists(tableId);
if (columnFamilyStore == null) {
logErrorAndSendFailureResponse(String.format("Table with id %s was dropped during prepare phase of repair", tableId), message);
return;
}
columnFamilyStores.add(columnFamilyStore);
}
ActiveRepairService.instance.registerParentRepairSession(prepareMessage.parentRepairSession, message.from(), columnFamilyStores, prepareMessage.ranges, prepareMessage.isIncremental, prepareMessage.timestamp, prepareMessage.isGlobal, prepareMessage.previewKind);
MessagingService.instance().send(message.emptyResponse(), message.from());
break;
case SNAPSHOT_MSG:
logger.debug("Snapshotting {}", desc);
final ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(desc.keyspace, desc.columnFamily);
if (cfs == null) {
logErrorAndSendFailureResponse(String.format("Table %s.%s was dropped during snapshot phase of repair %s", desc.keyspace, desc.columnFamily, desc.parentSessionId), message);
return;
}
ActiveRepairService.ParentRepairSession prs = ActiveRepairService.instance.getParentRepairSession(desc.parentSessionId);
prs.setHasSnapshots();
TableRepairManager repairManager = cfs.getRepairManager();
if (prs.isGlobal) {
repairManager.snapshot(desc.parentSessionId.toString(), prs.getRanges(), false);
} else {
repairManager.snapshot(desc.parentSessionId.toString(), desc.ranges, true);
}
logger.debug("Enqueuing response to snapshot request {} to {}", desc.sessionId, message.from());
MessagingService.instance().send(message.emptyResponse(), message.from());
break;
case VALIDATION_REQ:
ValidationRequest validationRequest = (ValidationRequest) message.payload;
logger.debug("Validating {}", validationRequest);
// trigger read-only compaction
ColumnFamilyStore store = ColumnFamilyStore.getIfExists(desc.keyspace, desc.columnFamily);
if (store == null) {
logger.error("Table {}.{} was dropped during snapshot phase of repair {}", desc.keyspace, desc.columnFamily, desc.parentSessionId);
MessagingService.instance().send(Message.out(VALIDATION_RSP, new ValidationResponse(desc)), message.from());
return;
}
ActiveRepairService.instance.consistent.local.maybeSetRepairing(desc.parentSessionId);
PreviewKind previewKind;
try {
previewKind = previewKind(desc.parentSessionId);
} catch (NoSuchRepairSessionException e) {
logger.warn("Parent repair session {} has been removed, failing repair", desc.parentSessionId);
MessagingService.instance().send(Message.out(VALIDATION_RSP, new ValidationResponse(desc)), message.from());
return;
}
Validator validator = new Validator(desc, message.from(), validationRequest.nowInSec, isIncremental(desc.parentSessionId), previewKind);
ValidationManager.instance.submitValidation(store, validator);
break;
case SYNC_REQ:
// forwarded sync request
SyncRequest request = (SyncRequest) message.payload;
logger.debug("Syncing {}", request);
StreamingRepairTask task = new StreamingRepairTask(desc, request.initiator, request.src, request.dst, request.ranges, isIncremental(desc.parentSessionId) ? desc.parentSessionId : null, request.previewKind, request.asymmetric);
task.run();
break;
case CLEANUP_MSG:
logger.debug("cleaning up repair");
CleanupMessage cleanup = (CleanupMessage) message.payload;
ActiveRepairService.instance.removeParentRepairSession(cleanup.parentRepairSession);
MessagingService.instance().send(message.emptyResponse(), message.from());
break;
case PREPARE_CONSISTENT_REQ:
ActiveRepairService.instance.consistent.local.handlePrepareMessage(message.from(), (PrepareConsistentRequest) message.payload);
break;
case PREPARE_CONSISTENT_RSP:
ActiveRepairService.instance.consistent.coordinated.handlePrepareResponse((PrepareConsistentResponse) message.payload);
break;
case FINALIZE_PROPOSE_MSG:
ActiveRepairService.instance.consistent.local.handleFinalizeProposeMessage(message.from(), (FinalizePropose) message.payload);
break;
case FINALIZE_PROMISE_MSG:
ActiveRepairService.instance.consistent.coordinated.handleFinalizePromiseMessage((FinalizePromise) message.payload);
break;
case FINALIZE_COMMIT_MSG:
ActiveRepairService.instance.consistent.local.handleFinalizeCommitMessage(message.from(), (FinalizeCommit) message.payload);
break;
case FAILED_SESSION_MSG:
FailSession failure = (FailSession) message.payload;
ActiveRepairService.instance.consistent.coordinated.handleFailSessionMessage(failure);
ActiveRepairService.instance.consistent.local.handleFailSessionMessage(message.from(), failure);
break;
case STATUS_REQ:
ActiveRepairService.instance.consistent.local.handleStatusRequest(message.from(), (StatusRequest) message.payload);
break;
case STATUS_RSP:
ActiveRepairService.instance.consistent.local.handleStatusResponse(message.from(), (StatusResponse) message.payload);
break;
default:
ActiveRepairService.instance.handleMessage(message);
break;
}
} catch (Exception e) {
logger.error("Got error, removing parent repair session");
if (desc != null && desc.parentSessionId != null)
ActiveRepairService.instance.removeParentRepairSession(desc.parentSessionId);
throw new RuntimeException(e);
}
}
use of org.apache.cassandra.streaming.PreviewKind in project cassandra by apache.
the class RepairOption method parse.
/**
* Construct RepairOptions object from given map of Strings.
* <p>
* Available options are:
*
* <table>
* <caption>Repair Options</caption>
* <thead>
* <tr>
* <th>key</th>
* <th>value</th>
* <th>default (when key not given)</th>
* </tr>
* </thead>
* <tbody>
* <tr>
* <td>parallelism</td>
* <td>"sequential", "parallel" or "dc_parallel"</td>
* <td>"sequential"</td>
* </tr>
* <tr>
* <td>primaryRange</td>
* <td>"true" if perform repair only on primary range.</td>
* <td>false</td>
* </tr>
* <tr>
* <td>incremental</td>
* <td>"true" if perform incremental repair.</td>
* <td>false</td>
* </tr>
* <tr>
* <td>trace</td>
* <td>"true" if repair is traced.</td>
* <td>false</td>
* </tr>
* <tr>
* <td>jobThreads</td>
* <td>Number of threads to use to run repair job.</td>
* <td>1</td>
* </tr>
* <tr>
* <td>ranges</td>
* <td>Ranges to repair. A range is expressed as <start token>:<end token>
* and multiple ranges can be given as comma separated ranges(e.g. aaa:bbb,ccc:ddd).</td>
* <td></td>
* </tr>
* <tr>
* <td>columnFamilies</td>
* <td>Specify names of ColumnFamilies to repair.
* Multiple ColumnFamilies can be given as comma separated values(e.g. cf1,cf2,cf3).</td>
* <td></td>
* </tr>
* <tr>
* <td>dataCenters</td>
* <td>Specify names of data centers who participate in this repair.
* Multiple data centers can be given as comma separated values(e.g. dc1,dc2,dc3).</td>
* <td></td>
* </tr>
* <tr>
* <td>hosts</td>
* <td>Specify names of hosts who participate in this repair.
* Multiple hosts can be given as comma separated values(e.g. cass1,cass2).</td>
* <td></td>
* </tr>
* <tr>
* <td>pullRepair</td>
* <td>"true" if the repair should only stream data one way from a remote host to this host.
* This is only allowed if exactly 2 hosts are specified along with a token range that they share.</td>
* <td>false</td>
* </tr>
* <tr>
* <td>forceRepair</td>
* <td>"true" if the repair should continue, even if one of the replicas involved is down.
* <td>false</td>
* </tr>
* <tr>
* <td>optimiseStreams</td>
* <td>"true" if we should try to optimise the syncing to avoid transfering identical
* ranges to the same host multiple times</td>
* <td>false</td>
* </tr>
* </tbody>
* </table>
*
* @param options options to parse
* @param partitioner partitioner is used to construct token ranges
* @return RepairOptions object
*/
public static RepairOption parse(Map<String, String> options, IPartitioner partitioner) {
// if no parallel option is given, then this will be "sequential" by default.
RepairParallelism parallelism = RepairParallelism.fromName(options.get(PARALLELISM_KEY));
boolean primaryRange = Boolean.parseBoolean(options.get(PRIMARY_RANGE_KEY));
boolean incremental = Boolean.parseBoolean(options.get(INCREMENTAL_KEY));
PreviewKind previewKind = PreviewKind.valueOf(options.getOrDefault(PREVIEW, PreviewKind.NONE.toString()));
boolean trace = Boolean.parseBoolean(options.get(TRACE_KEY));
boolean force = Boolean.parseBoolean(options.get(FORCE_REPAIR_KEY));
boolean pullRepair = Boolean.parseBoolean(options.get(PULL_REPAIR_KEY));
boolean ignoreUnreplicatedKeyspaces = Boolean.parseBoolean(options.get(IGNORE_UNREPLICATED_KS));
int jobThreads = 1;
if (options.containsKey(JOB_THREADS_KEY)) {
try {
jobThreads = Integer.parseInt(options.get(JOB_THREADS_KEY));
} catch (NumberFormatException ignore) {
}
}
// ranges
Set<Range<Token>> ranges = parseRanges(options.get(RANGES_KEY), partitioner);
boolean asymmetricSyncing = Boolean.parseBoolean(options.get(OPTIMISE_STREAMS_KEY));
RepairOption option = new RepairOption(parallelism, primaryRange, incremental, trace, jobThreads, ranges, !ranges.isEmpty(), pullRepair, force, previewKind, asymmetricSyncing, ignoreUnreplicatedKeyspaces);
// data centers
String dataCentersStr = options.get(DATACENTERS_KEY);
Collection<String> dataCenters = new HashSet<>();
if (dataCentersStr != null) {
StringTokenizer tokenizer = new StringTokenizer(dataCentersStr, ",");
while (tokenizer.hasMoreTokens()) {
dataCenters.add(tokenizer.nextToken().trim());
}
option.getDataCenters().addAll(dataCenters);
}
// hosts
String hostsStr = options.get(HOSTS_KEY);
Collection<String> hosts = new HashSet<>();
if (hostsStr != null) {
StringTokenizer tokenizer = new StringTokenizer(hostsStr, ",");
while (tokenizer.hasMoreTokens()) {
hosts.add(tokenizer.nextToken().trim());
}
option.getHosts().addAll(hosts);
}
// columnfamilies
String cfStr = options.get(COLUMNFAMILIES_KEY);
if (cfStr != null) {
Collection<String> columnFamilies = new HashSet<>();
StringTokenizer tokenizer = new StringTokenizer(cfStr, ",");
while (tokenizer.hasMoreTokens()) {
columnFamilies.add(tokenizer.nextToken().trim());
}
option.getColumnFamilies().addAll(columnFamilies);
}
// validate options
if (jobThreads > MAX_JOB_THREADS) {
throw new IllegalArgumentException("Too many job threads. Max is " + MAX_JOB_THREADS);
}
if (!dataCenters.isEmpty() && !hosts.isEmpty()) {
throw new IllegalArgumentException("Cannot combine -dc and -hosts options.");
}
if (primaryRange && ((!dataCenters.isEmpty() && !option.isInLocalDCOnly()) || !hosts.isEmpty())) {
throw new IllegalArgumentException("You need to run primary range repair on all nodes in the cluster.");
}
if (pullRepair) {
if (hosts.size() != 2) {
throw new IllegalArgumentException("Pull repair can only be performed between two hosts. Please specify two hosts, one of which must be this host.");
} else if (ranges.isEmpty()) {
throw new IllegalArgumentException("Token ranges must be specified when performing pull repair. Please specify at least one token range which both hosts have in common.");
}
}
return option;
}
use of org.apache.cassandra.streaming.PreviewKind in project cassandra by apache.
the class PreviewRepairTask method performUnsafe.
@Override
public Future<CoordinatedRepairResult> performUnsafe(ExecutorPlus executor) {
Future<CoordinatedRepairResult> f = runRepair(parentSession, false, executor, commonRanges, cfnames);
return f.map(result -> {
if (result.hasFailed())
return result;
PreviewKind previewKind = options.getPreviewKind();
Preconditions.checkState(previewKind != PreviewKind.NONE, "Preview is NONE");
SyncStatSummary summary = new SyncStatSummary(true);
summary.consumeSessionResults(result.results);
final String message;
if (summary.isEmpty()) {
message = previewKind == PreviewKind.REPAIRED ? "Repaired data is in sync" : "Previewed data was in sync";
} else {
message = (previewKind == PreviewKind.REPAIRED ? "Repaired data is inconsistent\n" : "Preview complete\n") + summary;
RepairMetrics.previewFailures.inc();
if (previewKind == PreviewKind.REPAIRED)
// we know its present as summary used it
maybeSnapshotReplicas(parentSession, keyspace, result.results.get());
}
notifier.notification(message);
return result;
});
}
use of org.apache.cassandra.streaming.PreviewKind in project cassandra by apache.
the class CassandraStreamManager method createOutgoingStreams.
@Override
public Collection<OutgoingStream> createOutgoingStreams(StreamSession session, RangesAtEndpoint replicas, UUID pendingRepair, PreviewKind previewKind) {
Refs<SSTableReader> refs = new Refs<>();
try {
final List<Range<PartitionPosition>> keyRanges = new ArrayList<>(replicas.size());
for (Replica replica : replicas) keyRanges.add(Range.makeRowRange(replica.range()));
refs.addAll(cfs.selectAndReference(view -> {
Set<SSTableReader> sstables = Sets.newHashSet();
SSTableIntervalTree intervalTree = SSTableIntervalTree.build(view.select(SSTableSet.CANONICAL));
Predicate<SSTableReader> predicate;
if (previewKind.isPreview()) {
predicate = previewKind.predicate();
} else if (pendingRepair == ActiveRepairService.NO_PENDING_REPAIR) {
predicate = Predicates.alwaysTrue();
} else {
predicate = s -> s.isPendingRepair() && s.getSSTableMetadata().pendingRepair.equals(pendingRepair);
}
for (Range<PartitionPosition> keyRange : keyRanges) {
// still actually selecting what we wanted.
for (SSTableReader sstable : Iterables.filter(View.sstablesInBounds(keyRange.left, keyRange.right, intervalTree), predicate)) {
sstables.add(sstable);
}
}
if (logger.isDebugEnabled())
logger.debug("ViewFilter for {}/{} sstables", sstables.size(), Iterables.size(view.select(SSTableSet.CANONICAL)));
return sstables;
}).refs);
List<Range<Token>> normalizedFullRanges = Range.normalize(replicas.onlyFull().ranges());
List<Range<Token>> normalizedAllRanges = Range.normalize(replicas.ranges());
// Create outgoing file streams for ranges possibly skipping repaired ranges in sstables
List<OutgoingStream> streams = new ArrayList<>(refs.size());
for (SSTableReader sstable : refs) {
List<Range<Token>> ranges = sstable.isRepaired() ? normalizedFullRanges : normalizedAllRanges;
List<SSTableReader.PartitionPositionBounds> sections = sstable.getPositionsForRanges(ranges);
Ref<SSTableReader> ref = refs.get(sstable);
if (sections.isEmpty()) {
ref.release();
continue;
}
streams.add(new CassandraOutgoingFile(session.getStreamOperation(), ref, sections, ranges, sstable.estimatedKeysForRanges(ranges)));
}
return streams;
} catch (Throwable t) {
refs.release();
throw t;
}
}
Aggregations