use of org.apache.cassandra.distributed.shared.RepairResult in project cassandra by apache.
the class PreviewRepairTest method repair.
/**
* returns a pair with [repair success, was inconsistent]
*/
private static IIsolatedExecutor.SerializableCallable<RepairResult> repair(Map<String, String> options) {
return () -> {
Condition await = newOneTimeCondition();
AtomicBoolean success = new AtomicBoolean(true);
AtomicBoolean wasInconsistent = new AtomicBoolean(false);
instance.repair(KEYSPACE, options, of((tag, event) -> {
if (event.getType() == ERROR) {
success.set(false);
await.signalAll();
} else if (event.getType() == NOTIFICATION && event.getMessage().contains("Repaired data is inconsistent")) {
wasInconsistent.set(true);
} else if (event.getType() == COMPLETE)
await.signalAll();
}));
try {
await.await(1, MINUTES);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
return new RepairResult(success.get(), wasInconsistent.get());
};
}
use of org.apache.cassandra.distributed.shared.RepairResult in project cassandra by apache.
the class PreviewRepairTest method testFinishingIncRepairDuringPreview.
/**
* another case where the repaired datasets could mismatch is if an incremental repair finishes just as the preview
* repair is starting up.
*
* This tests this case:
* 1. we start a preview repair
* 2. pause the validation requests from node1 -> node2
* 3. node1 starts its validation
* 4. run an incremental repair which completes fine
* 5. node2 resumes its validation
*
* Now we will include sstables from the second incremental repair on node2 but not on node1
* This should fail since we fail any preview repair which is ongoing when an incremental repair finishes (step 4 above)
*/
@Test
public void testFinishingIncRepairDuringPreview() throws IOException, InterruptedException, ExecutionException {
ExecutorService es = Executors.newSingleThreadExecutor();
try (Cluster cluster = init(Cluster.build(2).withConfig(config -> config.with(GOSSIP).with(NETWORK)).start())) {
cluster.schemaChange("create table " + KEYSPACE + ".tbl (id int primary key, t int)");
insert(cluster.coordinator(1), 0, 100);
cluster.forEach((node) -> node.flush(KEYSPACE));
cluster.get(1).callOnInstance(repair(options(false, false)));
insert(cluster.coordinator(1), 100, 100);
cluster.forEach((node) -> node.flush(KEYSPACE));
Condition previewRepairStarted = newOneTimeCondition();
Condition continuePreviewRepair = newOneTimeCondition();
DelayFirstRepairTypeMessageFilter filter = validationRequest(previewRepairStarted, continuePreviewRepair);
// this pauses the validation request sent from node1 to node2 until we have run a full inc repair below
cluster.filters().outbound().verbs(VALIDATION_REQ.id).from(1).to(2).messagesMatching(filter).drop();
Future<RepairResult> rsFuture = es.submit(() -> cluster.get(1).callOnInstance(repair(options(true, false))));
previewRepairStarted.await();
// this needs to finish before the preview repair is unpaused on node2
cluster.get(1).callOnInstance(repair(options(false, false)));
continuePreviewRepair.signalAll();
RepairResult rs = rsFuture.get();
// preview repair should have failed
assertFalse(rs.success);
// and no mismatches should have been reported
assertFalse(rs.wasInconsistent);
} finally {
es.shutdown();
}
}
use of org.apache.cassandra.distributed.shared.RepairResult in project cassandra by apache.
the class PreviewRepairTest method testConcurrentIncRepairDuringPreview.
/**
* Tests that a IR is running, but not completed before validation compaction starts
*/
@Test
public void testConcurrentIncRepairDuringPreview() throws IOException, InterruptedException, ExecutionException {
try (Cluster cluster = init(Cluster.build(2).withConfig(config -> config.with(GOSSIP).with(NETWORK)).start())) {
cluster.schemaChange("create table " + KEYSPACE + ".tbl (id int primary key, t int)");
insert(cluster.coordinator(1), 0, 100);
cluster.forEach((node) -> node.flush(KEYSPACE));
cluster.get(1).callOnInstance(repair(options(false, false)));
insert(cluster.coordinator(1), 100, 100);
cluster.forEach((node) -> node.flush(KEYSPACE));
Condition previewRepairStarted = newOneTimeCondition();
Condition continuePreviewRepair = newOneTimeCondition();
// this pauses the validation request sent from node1 to node2 until the inc repair below has run
cluster.filters().outbound().verbs(VALIDATION_REQ.id).from(1).to(2).messagesMatching(validationRequest(previewRepairStarted, continuePreviewRepair)).drop();
Condition irRepairStarted = newOneTimeCondition();
Condition continueIrRepair = newOneTimeCondition();
// this blocks the IR from committing, so we can reenable the preview
cluster.filters().outbound().verbs(FINALIZE_PROPOSE_MSG.id).from(1).to(2).messagesMatching(finalizePropose(irRepairStarted, continueIrRepair)).drop();
Future<RepairResult> previewResult = cluster.get(1).asyncCallsOnInstance(repair(options(true, false))).call();
previewRepairStarted.await();
// trigger IR and wait till its ready to commit
Future<RepairResult> irResult = cluster.get(1).asyncCallsOnInstance(repair(options(false, false))).call();
irRepairStarted.await();
// unblock preview repair and wait for it to complete
continuePreviewRepair.signalAll();
RepairResult rs = previewResult.get();
// preview repair should have failed
assertFalse(rs.success);
// and no mismatches should have been reported
assertFalse(rs.wasInconsistent);
continueIrRepair.signalAll();
RepairResult ir = irResult.get();
assertTrue(ir.success);
// not preview, so we don't care about preview notification
assertFalse(ir.wasInconsistent);
}
}
use of org.apache.cassandra.distributed.shared.RepairResult in project cassandra by apache.
the class PreviewRepairTest method testFinishingNonIntersectingIncRepairDuringPreview.
/**
* Same as testFinishingIncRepairDuringPreview but the previewed range does not intersect the incremental repair
* so both preview and incremental repair should finish fine (without any mismatches)
*/
@Test
public void testFinishingNonIntersectingIncRepairDuringPreview() throws IOException, InterruptedException, ExecutionException {
ExecutorService es = Executors.newSingleThreadExecutor();
try (Cluster cluster = init(Cluster.build(2).withConfig(config -> config.with(GOSSIP).with(NETWORK)).start())) {
cluster.schemaChange("create table " + KEYSPACE + ".tbl (id int primary key, t int)");
insert(cluster.coordinator(1), 0, 100);
cluster.forEach((node) -> node.flush(KEYSPACE));
assertTrue(cluster.get(1).callOnInstance(repair(options(false, false))).success);
insert(cluster.coordinator(1), 100, 100);
cluster.forEach((node) -> node.flush(KEYSPACE));
// pause preview repair validation messages on node2 until node1 has finished
Condition previewRepairStarted = newOneTimeCondition();
Condition continuePreviewRepair = newOneTimeCondition();
DelayFirstRepairTypeMessageFilter filter = validationRequest(previewRepairStarted, continuePreviewRepair);
cluster.filters().outbound().verbs(VALIDATION_REQ.id).from(1).to(2).messagesMatching(filter).drop();
// get local ranges to repair two separate ranges:
List<String> localRanges = cluster.get(1).callOnInstance(() -> {
List<String> res = new ArrayList<>();
for (Range<Token> r : instance.getLocalReplicas(KEYSPACE).ranges()) res.add(r.left.getTokenValue() + ":" + r.right.getTokenValue());
return res;
});
assertEquals(2, localRanges.size());
Future<RepairResult> repairStatusFuture = es.submit(() -> cluster.get(1).callOnInstance(repair(options(true, false, localRanges.get(0)))));
// wait for node1 to start validation compaction
previewRepairStarted.await();
// this needs to finish before the preview repair is unpaused on node2
assertTrue(cluster.get(1).callOnInstance(repair(options(false, false, localRanges.get(1)))).success);
continuePreviewRepair.signalAll();
RepairResult rs = repairStatusFuture.get();
// repair should succeed
assertTrue(rs.success);
// and no mismatches
assertFalse(rs.wasInconsistent);
} finally {
es.shutdown();
}
}
use of org.apache.cassandra.distributed.shared.RepairResult in project cassandra by apache.
the class PreviewRepairTest method testWithMismatchingPending.
/**
* makes sure that the repaired sstables are not matching on the two
* nodes by disabling autocompaction on node2 and then running an
* incremental repair
*/
@Test
public void testWithMismatchingPending() throws Throwable {
try (Cluster cluster = init(Cluster.build(2).withConfig(config -> config.with(GOSSIP).with(NETWORK)).start())) {
cluster.schemaChange("create table " + KEYSPACE + ".tbl (id int primary key, t int)");
insert(cluster.coordinator(1), 0, 100);
cluster.forEach((node) -> node.flush(KEYSPACE));
cluster.get(1).callOnInstance(repair(options(false, false)));
insert(cluster.coordinator(1), 100, 100);
cluster.forEach((node) -> node.flush(KEYSPACE));
// make sure that all sstables have moved to repaired by triggering a compaction
// also disables autocompaction on the nodes
cluster.forEach((node) -> node.runOnInstance(() -> {
ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore("tbl");
FBUtilities.waitOnFutures(CompactionManager.instance.submitBackground(cfs));
cfs.disableAutoCompaction();
}));
long[] marks = logMark(cluster);
cluster.get(1).callOnInstance(repair(options(false, false)));
// now re-enable autocompaction on node1, this moves the sstables for the new repair to repaired
cluster.get(1).runOnInstance(() -> {
ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore("tbl");
cfs.enableAutoCompaction();
FBUtilities.waitOnFutures(CompactionManager.instance.submitBackground(cfs));
});
waitLogsRepairFullyFinished(cluster, marks);
RepairResult rs = cluster.get(1).callOnInstance(repair(options(true, false)));
// preview repair should succeed
assertTrue(rs.success);
// and we should see no mismatches
assertFalse(rs.wasInconsistent);
}
}
Aggregations