use of org.apache.cassandra.spark.data.partitioner.TokenPartitioner in project spark-cassandra-bulkreader by jberragan.
the class KryoSerializationTests method testTokenPartitioner.
@Test
public void testTokenPartitioner() {
qt().forAll(TestUtils.partitioners(), arbitrary().pick(Arrays.asList(3, 16, 128)), arbitrary().pick(Arrays.asList(1, 4, 16)), arbitrary().pick(Arrays.asList(4, 16, 64))).checkAssert((partitioner, numInstances, defaultParallelism, numCores) -> {
final CassandraRing ring = TestUtils.createRing(partitioner, numInstances);
final TokenPartitioner tokenPartitioner = new TokenPartitioner(ring, defaultParallelism, numCores);
final Output out = KryoSerializationTests.serialize(tokenPartitioner);
final TokenPartitioner deserialized = KryoSerializationTests.deserialize(out, TokenPartitioner.class);
assertNotNull(deserialized);
assertEquals(tokenPartitioner.numPartitions(), deserialized.numPartitions());
assertEquals(tokenPartitioner.subRanges().size(), deserialized.subRanges().size());
for (int i = 0; i < tokenPartitioner.subRanges().size(); i++) {
assertEquals(tokenPartitioner.subRanges().get(i), deserialized.subRanges().get(i));
}
assertEquals(tokenPartitioner.ring(), deserialized.ring());
});
}
use of org.apache.cassandra.spark.data.partitioner.TokenPartitioner in project spark-cassandra-bulkreader by jberragan.
the class PartitionKeyFilterTests method testTokenRing.
@Test
public void testTokenRing() {
qt().forAll(TestUtils.partitioners(), arbitrary().pick(Arrays.asList(1, 3, 6, 12, 128))).checkAssert((partitioner, numInstances) -> {
final CassandraRing ring = TestUtils.createRing(partitioner, numInstances);
final TokenPartitioner tokenPartitioner = new TokenPartitioner(ring, 24, 24);
final List<BigInteger> boundaryTokens = IntStream.range(0, tokenPartitioner.numPartitions()).mapToObj(tokenPartitioner::getTokenRange).map(r -> Arrays.asList(r.lowerEndpoint(), midPoint(r), r.upperEndpoint())).flatMap(Collection::stream).collect(Collectors.toList());
for (final BigInteger token : boundaryTokens) {
// check boundary tokens only match 1 Spark token range
final PartitionKeyFilter filter = PartitionKeyFilter.create(Int32Type.instance.fromString("11"), token);
assertEquals(1, tokenPartitioner.subRanges().stream().filter(filter::overlaps).count());
}
});
}
use of org.apache.cassandra.spark.data.partitioner.TokenPartitioner in project spark-cassandra-bulkreader by jberragan.
the class PartitionedDataLayerTests method runSplitTests.
@SuppressWarnings("UnstableApiUsage")
private static void runSplitTests(final int minReplicas, final PartitionedDataLayer.AvailabilityHint... availabilityHint) {
final int numInstances = availabilityHint.length;
TestUtils.runTest((partitioner, dir, bridge) -> {
final CassandraRing ring = TestUtils.createRing(partitioner, numInstances);
final List<CassandraInstance> instances = new ArrayList<>(ring.instances());
instances.sort(Comparator.comparing(CassandraInstance::nodeName));
final TokenPartitioner tokenPartitioner = new TokenPartitioner(ring, 1, 32);
final Map<CassandraInstance, PartitionedDataLayer.AvailabilityHint> availableMap = new HashMap<>(numInstances);
for (int i = 0; i < numInstances; i++) {
availableMap.put(instances.get(i), availabilityHint[i]);
}
final Map<Range<BigInteger>, List<CassandraInstance>> ranges = ring.getSubRanges(tokenPartitioner.getTokenRange(0)).asMapOfRanges();
final PartitionedDataLayer.ReplicaSet replicaSet = PartitionedDataLayer.splitReplicas(instances, ranges, availableMap::get, minReplicas, 0);
assertEquals(minReplicas, replicaSet.primary().size());
assertEquals(numInstances - minReplicas, replicaSet.backup().size());
final List<CassandraInstance> sortedInstances = new ArrayList<>(instances);
sortedInstances.sort(Comparator.comparing(availableMap::get));
for (int i = 0; i < sortedInstances.size(); i++) {
if (i < minReplicas) {
assertTrue(replicaSet.primary().contains(sortedInstances.get(i)));
} else {
assertTrue(replicaSet.backup().contains(sortedInstances.get(i)));
}
}
});
}
use of org.apache.cassandra.spark.data.partitioner.TokenPartitioner in project spark-cassandra-bulkreader by jberragan.
the class PartitionedDataLayer method sstables.
@Override
public SSTablesSupplier sstables(final List<CustomFilter> filters) {
// get token range for Spark partition
final TokenPartitioner tokenPartitioner = tokenPartitioner();
final int partitionId = TaskContext.getPartitionId();
if (partitionId < 0 || partitionId >= tokenPartitioner.numPartitions()) {
throw new IllegalStateException("PartitionId outside expected range: " + partitionId);
}
// get all replicas overlapping partition token range
final Range<BigInteger> range = tokenPartitioner.getTokenRange(partitionId);
final CassandraRing ring = ring();
final ReplicationFactor rf = ring.replicationFactor();
validateReplicationFactor(rf);
final Map<Range<BigInteger>, List<CassandraInstance>> instRanges;
final Map<Range<BigInteger>, List<CassandraInstance>> subRanges = ring().getSubRanges(range).asMapOfRanges();
if (filters.stream().noneMatch(CustomFilter::canFilterByKey)) {
instRanges = subRanges;
} else {
instRanges = new HashMap<>();
subRanges.keySet().forEach(instRange -> {
if (filters.stream().filter(CustomFilter::canFilterByKey).anyMatch(filter -> filter.overlaps(instRange))) {
instRanges.putIfAbsent(instRange, subRanges.get(instRange));
}
});
}
final Set<CassandraInstance> replicas = PartitionedDataLayer.rangesToReplicas(consistencyLevel, dc, instRanges);
LOGGER.info("Creating partitioned SSTablesSupplier for Spark partition partitionId={} rangeLower={} rangeUpper={} numReplicas={}", partitionId, range.lowerEndpoint(), range.upperEndpoint(), replicas.size());
// use consistency level and replication factor to calculate min number of replicas required to satisfy consistency level
// split replicas into 'primary' and 'backup' replicas, attempt on primary replicas and use backups to retry in the event of a failure
final int minReplicas = consistencyLevel.blockFor(rf, dc);
final ReplicaSet replicaSet = PartitionedDataLayer.splitReplicas(consistencyLevel, dc, instRanges, replicas, this::getAvailability, minReplicas, partitionId);
if (replicaSet.primary().size() < minReplicas) {
// could not find enough primary replicas to meet consistency level
assert replicaSet.backup.isEmpty();
throw new NotEnoughReplicasException(consistencyLevel, range, minReplicas, replicas.size(), dc);
}
final ExecutorService executor = executorService();
final Stats stats = stats();
final Set<SingleReplica> primaryReplicas = replicaSet.primary().stream().map(inst -> new SingleReplica(inst, this, range, partitionId, executor, stats, replicaSet.isRepairPrimary(inst))).collect(Collectors.toSet());
final Set<SingleReplica> backupReplicas = replicaSet.backup().stream().map(inst -> new SingleReplica(inst, this, range, partitionId, executor, stats, true)).collect(Collectors.toSet());
return new MultipleReplicas(primaryReplicas, backupReplicas, stats);
}
use of org.apache.cassandra.spark.data.partitioner.TokenPartitioner in project spark-cassandra-bulkreader by jberragan.
the class PartitionedDataLayerTests method testSplitReplicas.
@SuppressWarnings("UnstableApiUsage")
private static void testSplitReplicas(final CassandraRing ring, final ConsistencyLevel consistencyLevel, final int defaultParallelism, final int numCores, final ReplicationFactor rf, final String dc) {
final TokenPartitioner tokenPartitioner = new TokenPartitioner(ring, defaultParallelism, numCores);
for (int partition = 0; partition < tokenPartitioner.numPartitions(); partition++) {
final Range<BigInteger> range = tokenPartitioner.getTokenRange(partition);
final Map<Range<BigInteger>, List<CassandraInstance>> subRanges = ring.getSubRanges(range).asMapOfRanges();
final Set<CassandraInstance> replicas = PartitionedDataLayer.rangesToReplicas(consistencyLevel, dc, subRanges);
final Function<CassandraInstance, PartitionedDataLayer.AvailabilityHint> availability = (instances) -> UP;
final int minReplicas = consistencyLevel.blockFor(rf, dc);
final PartitionedDataLayer.ReplicaSet replicaSet = PartitionedDataLayer.splitReplicas(consistencyLevel, dc, subRanges, replicas, availability, minReplicas, 0);
assertNotNull(replicaSet);
assertTrue(Collections.disjoint(replicaSet.primary(), replicaSet.backup()));
assertEquals(replicas.size(), replicaSet.primary().size() + replicaSet.backup().size());
}
}
Aggregations